From 540e2a052bf58110b3d2e0f3ba9329ca7fd6d929 Mon Sep 17 00:00:00 2001 From: XiangpengHao Date: Wed, 19 Nov 2025 17:06:06 -0600 Subject: [PATCH 1/8] improve performance on perfect shredding --- .../benches/variant_kernels.rs | 52 +++++++++++++- parquet-variant-compute/src/variant_get.rs | 68 ++++++++++++++++++- 2 files changed, 117 insertions(+), 3 deletions(-) diff --git a/parquet-variant-compute/benches/variant_kernels.rs b/parquet-variant-compute/benches/variant_kernels.rs index 4526713570be..87a1e3defb5b 100644 --- a/parquet-variant-compute/benches/variant_kernels.rs +++ b/parquet-variant-compute/benches/variant_kernels.rs @@ -15,10 +15,11 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{Array, ArrayRef, StringArray}; +use arrow::array::{Array, ArrayRef, BinaryViewArray, StringArray, StructArray}; use arrow::util::test_util::seedable_rng; +use arrow_schema::{DataType, Field, FieldRef, Fields}; use criterion::{Criterion, criterion_group, criterion_main}; -use parquet_variant::{Variant, VariantBuilder}; +use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, Variant, VariantBuilder}; use parquet_variant_compute::{ GetOptions, VariantArray, VariantArrayBuilder, json_to_variant, variant_get, }; @@ -98,9 +99,26 @@ pub fn variant_get_bench(c: &mut Criterion) { }); } +pub fn variant_get_shredded_utf8_bench(c: &mut Criterion) { + let variant_array = create_shredded_utf8_variant_array(8192); + let input = ArrayRef::from(variant_array); + + let field: FieldRef = Arc::new(Field::new("typed_value", DataType::Utf8, true)); + let options = GetOptions { + path: vec![].into(), + as_type: Some(field), + cast_options: Default::default(), + }; + + c.bench_function("variant_get_shredded_utf8", |b| { + b.iter(|| variant_get(&input.clone(), options.clone())) + }); +} + criterion_group!( benches, variant_get_bench, + variant_get_shredded_utf8_bench, benchmark_batch_json_string_to_variant ); criterion_main!(benches); @@ -121,6 +139,36 @@ fn create_primitive_variant_array(size: usize) -> VariantArray { variant_builder.build() } +/// Creates a `VariantArray` where the values are already shredded as UTF8. +fn create_shredded_utf8_variant_array(size: usize) -> VariantArray { + let metadata = BinaryViewArray::from_iter_values( + std::iter::repeat(EMPTY_VARIANT_METADATA_BYTES).take(size), + ); + let typed_value = StringArray::from_iter_values((0..size).map(|i| format!("value_{i}"))); + + let metadata_ref: ArrayRef = Arc::new(metadata); + let typed_value_ref: ArrayRef = Arc::new(typed_value); + + let fields = Fields::from(vec![ + Arc::new(Field::new( + "metadata", + metadata_ref.data_type().clone(), + false, + )), + Arc::new(Field::new( + "typed_value", + typed_value_ref.data_type().clone(), + true, + )), + ]); + + let struct_array = StructArray::new(fields, vec![metadata_ref, typed_value_ref], None); + let struct_array_ref: ArrayRef = Arc::new(struct_array); + + VariantArray::try_new(struct_array_ref.as_ref()) + .expect("created struct should be a valid shredded variant") +} + /// Return an iterator off JSON strings, each representing a person /// with random first name, last name, and age. /// diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 82f02c3df848..6b6050bc98a8 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -15,7 +15,8 @@ // specific language governing permissions and limitations // under the License. use arrow::{ - array::{self, Array, ArrayRef, BinaryViewArray, StructArray}, + array::{self, Array, ArrayRef, BinaryViewArray, StructArray, make_array}, + buffer::NullBuffer, compute::CastOptions, datatypes::Field, error::Result, @@ -109,6 +110,30 @@ pub(crate) fn follow_shredded_path_element<'a>( } } +/// Returns a cloned `ArrayRef` whose null mask is the union of the array's existing mask and +/// `parent_nulls`. If `parent_nulls` is `None` or contains no nulls, the original array is returned. +/// +/// This necessary because the null of the shredded value is the union of parent null and current nulls. +fn clone_with_parent_nulls( + array: &ArrayRef, + parent_nulls: Option<&NullBuffer>, +) -> Result { + let Some(parent_nulls) = parent_nulls else { + return Ok(array.clone()); + }; + if parent_nulls.null_count() == 0 { + return Ok(array.clone()); + } + + let combined_nulls = NullBuffer::union(array.as_ref().nulls(), Some(parent_nulls)); + let data = array + .to_data() + .into_builder() + .nulls(combined_nulls) + .build()?; + Ok(make_array(data)) +} + /// Follows the given path as far as possible through shredded variant fields. If the path ends on a /// shredded field, return it directly. Otherwise, use a row shredder to follow the rest of the path /// and extract the requested value on a per-row basis. @@ -208,6 +233,21 @@ fn shredded_get_path( return Ok(ArrayRef::from(target)); }; + // Try to return the typed value directly when we have a perfect shredding match. + if !matches!(as_field.data_type(), DataType::Struct(_)) { + if let Some(typed_value) = target.typed_value_field() { + let types_match = typed_value.data_type() == as_field.data_type(); + let value_not_present = target.value_field().is_none(); + // this is a perfect shredding, where the value is entirely shredded out, so we can just return the typed value + // note that we MUST check value_not_present, because some of the `typed_value` might be null but data is present in the `value` column. + // an alternative is to count whether typed_value has any non-nulls, or check every row in `value` is null, + // but this is too complicated and might be slow. + if types_match && value_not_present { + return clone_with_parent_nulls(typed_value, target.nulls()); + } + } + } + // Structs are special. Recurse into each field separately, hoping to follow the shredding even // further, and build up the final struct from those individually shredded results. if let DataType::Struct(fields) = as_field.data_type() { @@ -604,6 +644,32 @@ mod test { assert_eq!(&result, &expected) } + // on a perfect shredding, we must still obey the parent nulls + #[test] + fn get_variant_shredded_utf8_respects_parent_nulls() { + let metadata = + BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 2)); + let typed_value = Arc::new(StringArray::from(vec![Some("foo"), Some("bar")])); + let parent_nulls = NullBuffer::from(vec![false, true]); + + let variant_array = VariantArray::from_parts( + metadata, + None, + Some(typed_value.clone()), + Some(parent_nulls), + ); + let input: ArrayRef = ArrayRef::from(variant_array); + + let field = Field::new("result", DataType::Utf8, true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&input, options).unwrap(); + let strings = result.as_any().downcast_ref::().unwrap(); + + assert_eq!(strings.len(), 2); + assert!(strings.is_null(0)); + assert_eq!(strings.value(1), "bar"); + } + /// Shredding: extract a value as an Int32Array, unsafe cast (should error on "n/a") #[test] fn get_variant_shredded_int32_as_int32_unsafe_cast() { From 615386e7614d303d7194187af806c54733486c84 Mon Sep 17 00:00:00 2001 From: XiangpengHao Date: Wed, 19 Nov 2025 17:20:08 -0600 Subject: [PATCH 2/8] clippy --- parquet-variant-compute/benches/variant_kernels.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/parquet-variant-compute/benches/variant_kernels.rs b/parquet-variant-compute/benches/variant_kernels.rs index 87a1e3defb5b..13ff77d9fb18 100644 --- a/parquet-variant-compute/benches/variant_kernels.rs +++ b/parquet-variant-compute/benches/variant_kernels.rs @@ -141,9 +141,8 @@ fn create_primitive_variant_array(size: usize) -> VariantArray { /// Creates a `VariantArray` where the values are already shredded as UTF8. fn create_shredded_utf8_variant_array(size: usize) -> VariantArray { - let metadata = BinaryViewArray::from_iter_values( - std::iter::repeat(EMPTY_VARIANT_METADATA_BYTES).take(size), - ); + let metadata = + BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, size)); let typed_value = StringArray::from_iter_values((0..size).map(|i| format!("value_{i}"))); let metadata_ref: ArrayRef = Arc::new(metadata); From 75c43f36cae5e82da12d36d7f001ad959ad827e2 Mon Sep 17 00:00:00 2001 From: XiangpengHao Date: Fri, 21 Nov 2025 13:31:49 -0600 Subject: [PATCH 3/8] update comments --- parquet-variant-compute/src/variant_get.rs | 68 ++++++++++------------ 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 6b6050bc98a8..2b670f10c29f 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -16,7 +16,6 @@ // under the License. use arrow::{ array::{self, Array, ArrayRef, BinaryViewArray, StructArray, make_array}, - buffer::NullBuffer, compute::CastOptions, datatypes::Field, error::Result, @@ -110,30 +109,6 @@ pub(crate) fn follow_shredded_path_element<'a>( } } -/// Returns a cloned `ArrayRef` whose null mask is the union of the array's existing mask and -/// `parent_nulls`. If `parent_nulls` is `None` or contains no nulls, the original array is returned. -/// -/// This necessary because the null of the shredded value is the union of parent null and current nulls. -fn clone_with_parent_nulls( - array: &ArrayRef, - parent_nulls: Option<&NullBuffer>, -) -> Result { - let Some(parent_nulls) = parent_nulls else { - return Ok(array.clone()); - }; - if parent_nulls.null_count() == 0 { - return Ok(array.clone()); - } - - let combined_nulls = NullBuffer::union(array.as_ref().nulls(), Some(parent_nulls)); - let data = array - .to_data() - .into_builder() - .nulls(combined_nulls) - .build()?; - Ok(make_array(data)) -} - /// Follows the given path as far as possible through shredded variant fields. If the path ends on a /// shredded field, return it directly. Otherwise, use a row shredder to follow the rest of the path /// and extract the requested value on a per-row basis. @@ -234,18 +209,8 @@ fn shredded_get_path( }; // Try to return the typed value directly when we have a perfect shredding match. - if !matches!(as_field.data_type(), DataType::Struct(_)) { - if let Some(typed_value) = target.typed_value_field() { - let types_match = typed_value.data_type() == as_field.data_type(); - let value_not_present = target.value_field().is_none(); - // this is a perfect shredding, where the value is entirely shredded out, so we can just return the typed value - // note that we MUST check value_not_present, because some of the `typed_value` might be null but data is present in the `value` column. - // an alternative is to count whether typed_value has any non-nulls, or check every row in `value` is null, - // but this is too complicated and might be slow. - if types_match && value_not_present { - return clone_with_parent_nulls(typed_value, target.nulls()); - } - } + if let Some(shredded) = try_perfect_shredding(&target, as_field)? { + return Ok(shredded); } // Structs are special. Recurse into each field separately, hoping to follow the shredding even @@ -276,6 +241,35 @@ fn shredded_get_path( shred_basic_variant(target, VariantPath::default(), Some(as_field)) } +fn try_perfect_shredding( + variant_array: &VariantArray, + as_field: &Field, +) -> Result> { + // Try to return the typed value directly when we have a perfect shredding match. + if !matches!(as_field.data_type(), DataType::Struct(_)) + && let Some(typed_value) = variant_array.typed_value_field() + && typed_value.data_type() == as_field.data_type() + && variant_array + .value_field() + .is_none_or(|v| v.null_count() == v.len()) + { + // Here we need to gate against the case where the `typed_value` is null but data is in the `value` column. + // 1. If the `value` column is null, or + // 2. If every row in the `value` column is null + + // This is a perfect shredding, where the value is entirely shredded out, + // so we can just return the typed value. + // We need to inherit the nulls from the parent array. + let data = typed_value + .to_data() + .into_builder() + .nulls(variant_array.nulls().cloned()) + .build()?; + return Ok(Some(make_array(data))); + } + Ok(None) +} + /// Returns an array with the specified path extracted from the variant values. /// /// The return array type depends on the `as_type` field of the options parameter From b50e21c1e78f4bc7d02bd8d0fd1edd68f281c734 Mon Sep 17 00:00:00 2001 From: XiangpengHao Date: Fri, 21 Nov 2025 14:22:52 -0600 Subject: [PATCH 4/8] combine null again --- parquet-variant-compute/src/variant_get.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 2b670f10c29f..88e8b47d63de 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -259,11 +259,14 @@ fn try_perfect_shredding( // This is a perfect shredding, where the value is entirely shredded out, // so we can just return the typed value. - // We need to inherit the nulls from the parent array. + + let parent_nulls = variant_array.nulls(); + let current_nulls = typed_value.nulls(); + let combined_nulls = arrow::buffer::NullBuffer::union(parent_nulls, current_nulls); let data = typed_value .to_data() .into_builder() - .nulls(variant_array.nulls().cloned()) + .nulls(combined_nulls) .build()?; return Ok(Some(make_array(data))); } From 40f2b89e174beb7f856e4b9675ecd574d6bfcce4 Mon Sep 17 00:00:00 2001 From: XiangpengHao Date: Sun, 23 Nov 2025 16:33:44 -0600 Subject: [PATCH 5/8] update --- parquet-variant-compute/src/variant_get.rs | 50 +++------------------- 1 file changed, 6 insertions(+), 44 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 88e8b47d63de..d5cb7074c1ef 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. use arrow::{ - array::{self, Array, ArrayRef, BinaryViewArray, StructArray, make_array}, + array::{self, Array, ArrayRef, BinaryViewArray, StructArray}, compute::CastOptions, datatypes::Field, error::Result, @@ -209,7 +209,7 @@ fn shredded_get_path( }; // Try to return the typed value directly when we have a perfect shredding match. - if let Some(shredded) = try_perfect_shredding(&target, as_field)? { + if let Some(shredded) = try_perfect_shredding(&target, as_field) { return Ok(shredded); } @@ -241,10 +241,7 @@ fn shredded_get_path( shred_basic_variant(target, VariantPath::default(), Some(as_field)) } -fn try_perfect_shredding( - variant_array: &VariantArray, - as_field: &Field, -) -> Result> { +fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> Option { // Try to return the typed value directly when we have a perfect shredding match. if !matches!(as_field.data_type(), DataType::Struct(_)) && let Some(typed_value) = variant_array.typed_value_field() @@ -259,18 +256,9 @@ fn try_perfect_shredding( // This is a perfect shredding, where the value is entirely shredded out, // so we can just return the typed value. - - let parent_nulls = variant_array.nulls(); - let current_nulls = typed_value.nulls(); - let combined_nulls = arrow::buffer::NullBuffer::union(parent_nulls, current_nulls); - let data = typed_value - .to_data() - .into_builder() - .nulls(combined_nulls) - .build()?; - return Ok(Some(make_array(data))); - } - Ok(None) + return Some(typed_value.clone()); + } + None } /// Returns an array with the specified path extracted from the variant values. @@ -641,32 +629,6 @@ mod test { assert_eq!(&result, &expected) } - // on a perfect shredding, we must still obey the parent nulls - #[test] - fn get_variant_shredded_utf8_respects_parent_nulls() { - let metadata = - BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 2)); - let typed_value = Arc::new(StringArray::from(vec![Some("foo"), Some("bar")])); - let parent_nulls = NullBuffer::from(vec![false, true]); - - let variant_array = VariantArray::from_parts( - metadata, - None, - Some(typed_value.clone()), - Some(parent_nulls), - ); - let input: ArrayRef = ArrayRef::from(variant_array); - - let field = Field::new("result", DataType::Utf8, true); - let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); - let result = variant_get(&input, options).unwrap(); - let strings = result.as_any().downcast_ref::().unwrap(); - - assert_eq!(strings.len(), 2); - assert!(strings.is_null(0)); - assert_eq!(strings.value(1), "bar"); - } - /// Shredding: extract a value as an Int32Array, unsafe cast (should error on "n/a") #[test] fn get_variant_shredded_int32_as_int32_unsafe_cast() { From 79d15f2a37291d07d2c7c2183e153fb184e2ade3 Mon Sep 17 00:00:00 2001 From: XiangpengHao Date: Sun, 23 Nov 2025 16:39:08 -0600 Subject: [PATCH 6/8] add a ptr eq test --- parquet-variant-compute/src/variant_get.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index d5cb7074c1ef..55236b636940 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -3824,4 +3824,18 @@ mod test { assert!(result.is_null(i)); } } + + #[test] + fn test_perfect_shredding_returns_same_arc_ptr() { + let variant_array = perfectly_shredded_int32_variant_array(); + + let variant_array_ref = VariantArray::try_new(&variant_array).unwrap(); + let typed_value_arc = variant_array_ref.typed_value_field().unwrap().clone(); + + let field = Field::new("result", DataType::Int32, true); + let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); + let result = variant_get(&variant_array, options).unwrap(); + + assert!(Arc::ptr_eq(&typed_value_arc, &result)); + } } From cee84bd84eb2a9c0443092c45dd204faa19eef50 Mon Sep 17 00:00:00 2001 From: XiangpengHao Date: Sun, 23 Nov 2025 17:10:09 -0600 Subject: [PATCH 7/8] fmt, msrv --- parquet-variant-compute/src/variant_get.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 55236b636940..3bfba7b68237 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -243,8 +243,10 @@ fn shredded_get_path( fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> Option { // Try to return the typed value directly when we have a perfect shredding match. - if !matches!(as_field.data_type(), DataType::Struct(_)) - && let Some(typed_value) = variant_array.typed_value_field() + if matches!(as_field.data_type(), DataType::Struct(_)) { + return None; + } + if let Some(typed_value) = variant_array.typed_value_field() && typed_value.data_type() == as_field.data_type() && variant_array .value_field() @@ -3828,14 +3830,14 @@ mod test { #[test] fn test_perfect_shredding_returns_same_arc_ptr() { let variant_array = perfectly_shredded_int32_variant_array(); - + let variant_array_ref = VariantArray::try_new(&variant_array).unwrap(); let typed_value_arc = variant_array_ref.typed_value_field().unwrap().clone(); - + let field = Field::new("result", DataType::Int32, true); let options = GetOptions::new().with_as_type(Some(FieldRef::from(field))); let result = variant_get(&variant_array, options).unwrap(); - + assert!(Arc::ptr_eq(&typed_value_arc, &result)); } } From 0eb317d235f9b19cedd8481356bd03bc40e1d8c2 Mon Sep 17 00:00:00 2001 From: XiangpengHao Date: Wed, 26 Nov 2025 09:54:07 -0600 Subject: [PATCH 8/8] add test and msrv --- parquet-variant-compute/src/variant_get.rs | 123 ++++++++++++++++++++- 1 file changed, 121 insertions(+), 2 deletions(-) diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 3bfba7b68237..fc1f46245c9d 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -246,8 +246,8 @@ fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> Opti if matches!(as_field.data_type(), DataType::Struct(_)) { return None; } - if let Some(typed_value) = variant_array.typed_value_field() - && typed_value.data_type() == as_field.data_type() + let typed_value = variant_array.typed_value_field()?; + if typed_value.data_type() == as_field.data_type() && variant_array .value_field() .is_none_or(|v| v.null_count() == v.len()) @@ -3840,4 +3840,123 @@ mod test { assert!(Arc::ptr_eq(&typed_value_arc, &result)); } + + #[test] + fn test_perfect_shredding_three_typed_value_columns() { + // Column 1: perfectly shredded primitive with all nulls + let all_nulls_values: Arc = Arc::new(Int32Array::from(vec![ + Option::::None, + Option::::None, + Option::::None, + ])); + let all_nulls_erased: ArrayRef = all_nulls_values.clone(); + let all_nulls_field = + ShreddedVariantFieldArray::from_parts(None, Some(all_nulls_erased.clone()), None); + let all_nulls_type = all_nulls_field.data_type().clone(); + let all_nulls_struct: ArrayRef = ArrayRef::from(all_nulls_field); + + // Column 2: perfectly shredded primitive with some nulls + let some_nulls_values: Arc = + Arc::new(Int32Array::from(vec![Some(10), None, Some(30)])); + let some_nulls_erased: ArrayRef = some_nulls_values.clone(); + let some_nulls_field = + ShreddedVariantFieldArray::from_parts(None, Some(some_nulls_erased.clone()), None); + let some_nulls_type = some_nulls_field.data_type().clone(); + let some_nulls_struct: ArrayRef = ArrayRef::from(some_nulls_field); + + // Column 3: perfectly shredded nested struct + let inner_values: Arc = + Arc::new(Int32Array::from(vec![Some(111), None, Some(333)])); + let inner_erased: ArrayRef = inner_values.clone(); + let inner_field = + ShreddedVariantFieldArray::from_parts(None, Some(inner_erased.clone()), None); + let inner_field_type = inner_field.data_type().clone(); + let inner_struct_array: ArrayRef = ArrayRef::from(inner_field); + + let nested_struct = Arc::new( + StructArray::try_new( + Fields::from(vec![Field::new("inner", inner_field_type, true)]), + vec![inner_struct_array], + None, + ) + .unwrap(), + ); + let nested_struct_erased: ArrayRef = nested_struct.clone(); + let struct_field = + ShreddedVariantFieldArray::from_parts(None, Some(nested_struct_erased.clone()), None); + let struct_field_type = struct_field.data_type().clone(); + let struct_field_struct: ArrayRef = ArrayRef::from(struct_field); + + // Assemble the top-level typed_value struct with the three columns above + let typed_value_struct = StructArray::try_new( + Fields::from(vec![ + Field::new("all_nulls", all_nulls_type, true), + Field::new("some_nulls", some_nulls_type, true), + Field::new("struct_field", struct_field_type, true), + ]), + vec![all_nulls_struct, some_nulls_struct, struct_field_struct], + None, + ) + .unwrap(); + + let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n( + EMPTY_VARIANT_METADATA_BYTES, + all_nulls_values.len(), + )); + let variant_struct = StructArrayBuilder::new() + .with_field("metadata", Arc::new(metadata), false) + .with_field("typed_value", Arc::new(typed_value_struct), true) + .build(); + let variant_array: ArrayRef = VariantArray::try_new(&variant_struct).unwrap().into(); + + // Case 1: all-null primitive column should reuse the typed_value Arc directly + let all_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true)); + let all_nulls_result = variant_get( + &variant_array, + GetOptions::new_with_path(VariantPath::from("all_nulls")) + .with_as_type(Some(all_nulls_field_ref)), + ) + .unwrap(); + assert!(Arc::ptr_eq(&all_nulls_result, &all_nulls_erased)); + + // Case 2: primitive column with some nulls should also reuse its typed_value Arc + let some_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true)); + let some_nulls_result = variant_get( + &variant_array, + GetOptions::new_with_path(VariantPath::from("some_nulls")) + .with_as_type(Some(some_nulls_field_ref)), + ) + .unwrap(); + assert!(Arc::ptr_eq(&some_nulls_result, &some_nulls_erased)); + + // Case 3: struct column should return a StructArray composed from the nested field + let struct_child_fields = Fields::from(vec![Field::new("inner", DataType::Int32, true)]); + let struct_field_ref = FieldRef::from(Field::new( + "result", + DataType::Struct(struct_child_fields.clone()), + true, + )); + let struct_result = variant_get( + &variant_array, + GetOptions::new_with_path(VariantPath::from("struct_field")) + .with_as_type(Some(struct_field_ref)), + ) + .unwrap(); + let struct_array = struct_result + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(struct_array.len(), 3); + assert_eq!(struct_array.null_count(), 0); + + let inner_values_result = struct_array + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(inner_values_result.len(), 3); + assert_eq!(inner_values_result.value(0), 111); + assert!(inner_values_result.is_null(1)); + assert_eq!(inner_values_result.value(2), 333); + } }