Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datafusion/common/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ pub fn as_fixed_size_list_array(array: &dyn Array) -> Result<&FixedSizeListArray
Ok(downcast_value!(array, FixedSizeListArray))
}

// Downcast Array to FixedSizeListArray
// Downcast Array to FixedSizeBinaryArray
pub fn as_fixed_size_binary_array(array: &dyn Array) -> Result<&FixedSizeBinaryArray> {
Ok(downcast_value!(array, FixedSizeBinaryArray))
}
Expand Down
100 changes: 52 additions & 48 deletions datafusion/functions-nested/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ use arrow::array::{
};
use arrow::datatypes::{DataType, Field};

use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result};
use datafusion_common::utils::ListCoercion;
use datafusion_common::{not_impl_err, DataFusionError, Result};

use std::any::Any;

Expand All @@ -39,12 +40,15 @@ use arrow::compute::cast;
use arrow::datatypes::DataType::{
Dictionary, FixedSizeList, LargeList, LargeUtf8, List, Null, Utf8, Utf8View,
};
use datafusion_common::cast::{as_large_list_array, as_list_array};
use datafusion_common::cast::{
as_fixed_size_list_array, as_large_list_array, as_list_array,
};
use datafusion_common::exec_err;
use datafusion_common::types::logical_string;
use datafusion_expr::{
Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature,
TypeSignatureClass, Volatility,
ArrayFunctionArgument, ArrayFunctionSignature, Coercion, ColumnarValue,
Documentation, ScalarUDFImpl, Signature, TypeSignature, TypeSignatureClass,
Volatility,
};
use datafusion_functions::downcast_arg;
use datafusion_macros::user_doc;
Expand Down Expand Up @@ -159,7 +163,26 @@ impl Default for ArrayToString {
impl ArrayToString {
pub fn new() -> Self {
Self {
signature: Signature::variadic_any(Volatility::Immutable),
signature: Signature::one_of(
vec![
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::String,
ArrayFunctionArgument::String,
],
array_coercion: Some(ListCoercion::FixedSizedListToList),
}),
TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
arguments: vec![
ArrayFunctionArgument::Array,
ArrayFunctionArgument::String,
],
array_coercion: Some(ListCoercion::FixedSizedListToList),
}),
],
Volatility::Immutable,
),
aliases: vec![
String::from("list_to_string"),
String::from("array_join"),
Expand All @@ -182,13 +205,8 @@ impl ScalarUDFImpl for ArrayToString {
&self.signature
}

fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(match arg_types[0] {
List(_) | LargeList(_) | FixedSizeList(_, _) => Utf8,
_ => {
return plan_err!("The array_to_string function can only accept List/LargeList/FixedSizeList.");
}
})
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(Utf8)
Comment on lines -185 to +209
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't need to guard here since type signature should enforce for us now

}

fn invoke_with_args(
Expand Down Expand Up @@ -282,16 +300,10 @@ impl ScalarUDFImpl for StringToArray {
}

fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
Ok(match arg_types[0] {
Utf8 | Utf8View | LargeUtf8 => {
List(Arc::new(Field::new_list_field(arg_types[0].clone(), true)))
}
_ => {
return plan_err!(
"The string_to_array function can only accept Utf8, Utf8View or LargeUtf8."
);
}
})
Ok(List(Arc::new(Field::new_list_field(
arg_types[0].clone(),
true,
))))
Comment on lines -285 to +306
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for string to array, but is similar to above in that type signature already ensures the arguments are string

}

fn invoke_with_args(
Expand Down Expand Up @@ -368,6 +380,20 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {

Ok(arg)
}
FixedSizeList(..) => {
let list_array = as_fixed_size_list_array(&arr)?;
for i in 0..list_array.len() {
compute_array_to_string(
arg,
list_array.value(i),
delimiter.clone(),
null_string.clone(),
with_null_string,
)?;
}

Ok(arg)
}
Comment on lines +383 to +396
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is handling for when an element of the input list is a FixedSizeList itself

LargeList(..) => {
let list_array = as_large_list_array(&arr)?;
for i in 0..list_array.len() {
Expand Down Expand Up @@ -449,9 +475,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
Ok(StringArray::from(res))
}

let arr_type = arr.data_type();
let string_arr = match arr_type {
List(_) | FixedSizeList(_, _) => {
let string_arr = match arr.data_type() {
List(_) => {
let list_array = as_list_array(&arr)?;
generate_string_array::<i32>(
list_array,
Expand All @@ -469,29 +494,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
with_null_string,
)?
}
_ => {
let mut arg = String::from("");
let mut res: Vec<Option<String>> = Vec::new();
// delimiter length is 1
assert_eq!(delimiters.len(), 1);
let delimiter = delimiters[0].unwrap();
let s = compute_array_to_string(
&mut arg,
Arc::clone(arr),
delimiter.to_string(),
null_string,
with_null_string,
)?
.clone();

if !s.is_empty() {
let s = s.strip_suffix(delimiter).unwrap().to_string();
res.push(Some(s));
} else {
res.push(Some(s));
}
StringArray::from(res)
}
Comment on lines -472 to -494
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed this arm as the array (which is the first argument) must be a List type (List, Fixed or Large) by this point so this arm didn't make sense; even before my changes I don't think this arm was ever reachable as the return_type() function guarded arg[0] to be a list type 🤔

// Signature guards against this arm
_ => return exec_err!("array_to_string expects list as first argument"),
};

Ok(Arc::new(string_arr))
Expand Down
15 changes: 15 additions & 0 deletions datafusion/sqllogictest/test_files/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4703,6 +4703,11 @@ select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'),
----
h,e,l,l,o 1-2-3-4-5 1|2|3

query TTT
select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), ','), array_to_string(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), '-'), array_to_string(arrow_cast([1.0, 2.0, 3.0], 'FixedSizeList(3, Float64)'), '|');
----
h,e,l,l,o 1-2-3-4-5 1|2|3

# array_to_string scalar function with nulls #2
query TTT
select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), array_to_string(make_array(NULL, 2, NULL, 4, 5), '-', 'nil'), array_to_string(make_array(1.0, NULL, 3.0), '|', '0');
Expand All @@ -4714,6 +4719,16 @@ select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'Large
----
h,-,-,-,o nil-2-nil-4-5 1|0|3

query TTT
select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'FixedSizeList(5, Utf8)'), ',', '-'), array_to_string(arrow_cast(make_array(NULL, 2, NULL, 4, 5), 'FixedSizeList(5, Int64)'), '-', 'nil'), array_to_string(arrow_cast(make_array(1.0, NULL, 3.0), 'FixedSizeList(3, Float64)'), '|', '0');
----
h,-,-,-,o nil-2-nil-4-5 1|0|3

query T
select array_to_string(arrow_cast([arrow_cast([NULL, 'a'], 'FixedSizeList(2, Utf8)'), NULL], 'FixedSizeList(2, FixedSizeList(2, Utf8))'), ',', '-');
----
-,a,-,-
Comment on lines +4727 to +4730
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is interesting; it's formatting [[NULL, 'a'], NULL] to become this; note three - implying three nulls. I'm not sure if this is desired or if it should instead be -,a,- since the second element of the parent array is null (current code considers it to be two nulls, I guess because it represents a FixedSizeList(2)) 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll look into this as a follow-on issue

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raised #17692


# array_to_string with columns #1

# For reference
Expand Down
Loading