Skip to content

Commit d55fb6d

Browse files
authored
Support FixedSizeList for array_to_string (#17666)
1 parent c2d839f commit d55fb6d

File tree

3 files changed

+68
-49
lines changed

3 files changed

+68
-49
lines changed

datafusion/common/src/cast.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ pub fn as_fixed_size_list_array(array: &dyn Array) -> Result<&FixedSizeListArray
313313
Ok(downcast_value!(array, FixedSizeListArray))
314314
}
315315

316-
// Downcast Array to FixedSizeListArray
316+
// Downcast Array to FixedSizeBinaryArray
317317
pub fn as_fixed_size_binary_array(array: &dyn Array) -> Result<&FixedSizeBinaryArray> {
318318
Ok(downcast_value!(array, FixedSizeBinaryArray))
319319
}

datafusion/functions-nested/src/string.rs

Lines changed: 52 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ use arrow::array::{
2525
};
2626
use arrow::datatypes::{DataType, Field};
2727

28-
use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result};
28+
use datafusion_common::utils::ListCoercion;
29+
use datafusion_common::{not_impl_err, DataFusionError, Result};
2930

3031
use std::any::Any;
3132

@@ -39,12 +40,15 @@ use arrow::compute::cast;
3940
use arrow::datatypes::DataType::{
4041
Dictionary, FixedSizeList, LargeList, LargeUtf8, List, Null, Utf8, Utf8View,
4142
};
42-
use datafusion_common::cast::{as_large_list_array, as_list_array};
43+
use datafusion_common::cast::{
44+
as_fixed_size_list_array, as_large_list_array, as_list_array,
45+
};
4346
use datafusion_common::exec_err;
4447
use datafusion_common::types::logical_string;
4548
use datafusion_expr::{
46-
Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature,
47-
TypeSignatureClass, Volatility,
49+
ArrayFunctionArgument, ArrayFunctionSignature, Coercion, ColumnarValue,
50+
Documentation, ScalarUDFImpl, Signature, TypeSignature, TypeSignatureClass,
51+
Volatility,
4852
};
4953
use datafusion_functions::downcast_arg;
5054
use datafusion_macros::user_doc;
@@ -159,7 +163,26 @@ impl Default for ArrayToString {
159163
impl ArrayToString {
160164
pub fn new() -> Self {
161165
Self {
162-
signature: Signature::variadic_any(Volatility::Immutable),
166+
signature: Signature::one_of(
167+
vec![
168+
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
169+
arguments: vec![
170+
ArrayFunctionArgument::Array,
171+
ArrayFunctionArgument::String,
172+
ArrayFunctionArgument::String,
173+
],
174+
array_coercion: Some(ListCoercion::FixedSizedListToList),
175+
}),
176+
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
177+
arguments: vec![
178+
ArrayFunctionArgument::Array,
179+
ArrayFunctionArgument::String,
180+
],
181+
array_coercion: Some(ListCoercion::FixedSizedListToList),
182+
}),
183+
],
184+
Volatility::Immutable,
185+
),
163186
aliases: vec![
164187
String::from("list_to_string"),
165188
String::from("array_join"),
@@ -182,13 +205,8 @@ impl ScalarUDFImpl for ArrayToString {
182205
&self.signature
183206
}
184207

185-
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
186-
Ok(match arg_types[0] {
187-
List(_) | LargeList(_) | FixedSizeList(_, _) => Utf8,
188-
_ => {
189-
return plan_err!("The array_to_string function can only accept List/LargeList/FixedSizeList.");
190-
}
191-
})
208+
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
209+
Ok(Utf8)
192210
}
193211

194212
fn invoke_with_args(
@@ -282,16 +300,10 @@ impl ScalarUDFImpl for StringToArray {
282300
}
283301

284302
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
285-
Ok(match arg_types[0] {
286-
Utf8 | Utf8View | LargeUtf8 => {
287-
List(Arc::new(Field::new_list_field(arg_types[0].clone(), true)))
288-
}
289-
_ => {
290-
return plan_err!(
291-
"The string_to_array function can only accept Utf8, Utf8View or LargeUtf8."
292-
);
293-
}
294-
})
303+
Ok(List(Arc::new(Field::new_list_field(
304+
arg_types[0].clone(),
305+
true,
306+
))))
295307
}
296308

297309
fn invoke_with_args(
@@ -368,6 +380,20 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
368380

369381
Ok(arg)
370382
}
383+
FixedSizeList(..) => {
384+
let list_array = as_fixed_size_list_array(&arr)?;
385+
for i in 0..list_array.len() {
386+
compute_array_to_string(
387+
arg,
388+
list_array.value(i),
389+
delimiter.clone(),
390+
null_string.clone(),
391+
with_null_string,
392+
)?;
393+
}
394+
395+
Ok(arg)
396+
}
371397
LargeList(..) => {
372398
let list_array = as_large_list_array(&arr)?;
373399
for i in 0..list_array.len() {
@@ -449,9 +475,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
449475
Ok(StringArray::from(res))
450476
}
451477

452-
let arr_type = arr.data_type();
453-
let string_arr = match arr_type {
454-
List(_) | FixedSizeList(_, _) => {
478+
let string_arr = match arr.data_type() {
479+
List(_) => {
455480
let list_array = as_list_array(&arr)?;
456481
generate_string_array::<i32>(
457482
list_array,
@@ -469,29 +494,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
469494
with_null_string,
470495
)?
471496
}
472-
_ => {
473-
let mut arg = String::from("");
474-
let mut res: Vec<Option<String>> = Vec::new();
475-
// delimiter length is 1
476-
assert_eq!(delimiters.len(), 1);
477-
let delimiter = delimiters[0].unwrap();
478-
let s = compute_array_to_string(
479-
&mut arg,
480-
Arc::clone(arr),
481-
delimiter.to_string(),
482-
null_string,
483-
with_null_string,
484-
)?
485-
.clone();
486-
487-
if !s.is_empty() {
488-
let s = s.strip_suffix(delimiter).unwrap().to_string();
489-
res.push(Some(s));
490-
} else {
491-
res.push(Some(s));
492-
}
493-
StringArray::from(res)
494-
}
497+
// Signature guards against this arm
498+
_ => return exec_err!("array_to_string expects list as first argument"),
495499
};
496500

497501
Ok(Arc::new(string_arr))

datafusion/sqllogictest/test_files/array.slt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4809,6 +4809,11 @@ select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'),
48094809
----
48104810
h,e,l,l,o 1-2-3-4-5 1|2|3
48114811

4812+
query TTT
4813+
select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), ','), array_to_string(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), '-'), array_to_string(arrow_cast([1.0, 2.0, 3.0], 'FixedSizeList(3, Float64)'), '|');
4814+
----
4815+
h,e,l,l,o 1-2-3-4-5 1|2|3
4816+
48124817
# array_to_string scalar function with nulls #2
48134818
query TTT
48144819
select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), array_to_string(make_array(NULL, 2, NULL, 4, 5), '-', 'nil'), array_to_string(make_array(1.0, NULL, 3.0), '|', '0');
@@ -4820,6 +4825,16 @@ select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'Large
48204825
----
48214826
h,-,-,-,o nil-2-nil-4-5 1|0|3
48224827

4828+
query TTT
4829+
select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'FixedSizeList(5, Utf8)'), ',', '-'), array_to_string(arrow_cast(make_array(NULL, 2, NULL, 4, 5), 'FixedSizeList(5, Int64)'), '-', 'nil'), array_to_string(arrow_cast(make_array(1.0, NULL, 3.0), 'FixedSizeList(3, Float64)'), '|', '0');
4830+
----
4831+
h,-,-,-,o nil-2-nil-4-5 1|0|3
4832+
4833+
query T
4834+
select array_to_string(arrow_cast([arrow_cast([NULL, 'a'], 'FixedSizeList(2, Utf8)'), NULL], 'FixedSizeList(2, FixedSizeList(2, Utf8))'), ',', '-');
4835+
----
4836+
-,a,-,-
4837+
48234838
# array_to_string with columns #1
48244839

48254840
# For reference

0 commit comments

Comments
 (0)