-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Support FixedSizeList for array_to_string #17666
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,7 +25,8 @@ use arrow::array::{ | |
}; | ||
use arrow::datatypes::{DataType, Field}; | ||
|
||
use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result}; | ||
use datafusion_common::utils::ListCoercion; | ||
use datafusion_common::{not_impl_err, DataFusionError, Result}; | ||
|
||
use std::any::Any; | ||
|
||
|
@@ -39,12 +40,15 @@ use arrow::compute::cast; | |
use arrow::datatypes::DataType::{ | ||
Dictionary, FixedSizeList, LargeList, LargeUtf8, List, Null, Utf8, Utf8View, | ||
}; | ||
use datafusion_common::cast::{as_large_list_array, as_list_array}; | ||
use datafusion_common::cast::{ | ||
as_fixed_size_list_array, as_large_list_array, as_list_array, | ||
}; | ||
use datafusion_common::exec_err; | ||
use datafusion_common::types::logical_string; | ||
use datafusion_expr::{ | ||
Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, | ||
TypeSignatureClass, Volatility, | ||
ArrayFunctionArgument, ArrayFunctionSignature, Coercion, ColumnarValue, | ||
Documentation, ScalarUDFImpl, Signature, TypeSignature, TypeSignatureClass, | ||
Volatility, | ||
}; | ||
use datafusion_functions::downcast_arg; | ||
use datafusion_macros::user_doc; | ||
|
@@ -159,7 +163,26 @@ impl Default for ArrayToString { | |
impl ArrayToString { | ||
pub fn new() -> Self { | ||
Self { | ||
signature: Signature::variadic_any(Volatility::Immutable), | ||
signature: Signature::one_of( | ||
vec![ | ||
TypeSignature::ArraySignature(ArrayFunctionSignature::Array { | ||
arguments: vec![ | ||
ArrayFunctionArgument::Array, | ||
ArrayFunctionArgument::String, | ||
ArrayFunctionArgument::String, | ||
], | ||
array_coercion: Some(ListCoercion::FixedSizedListToList), | ||
}), | ||
TypeSignature::ArraySignature(ArrayFunctionSignature::Array { | ||
arguments: vec![ | ||
ArrayFunctionArgument::Array, | ||
ArrayFunctionArgument::String, | ||
], | ||
array_coercion: Some(ListCoercion::FixedSizedListToList), | ||
}), | ||
], | ||
Volatility::Immutable, | ||
), | ||
aliases: vec![ | ||
String::from("list_to_string"), | ||
String::from("array_join"), | ||
|
@@ -182,13 +205,8 @@ impl ScalarUDFImpl for ArrayToString { | |
&self.signature | ||
} | ||
|
||
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> { | ||
Ok(match arg_types[0] { | ||
List(_) | LargeList(_) | FixedSizeList(_, _) => Utf8, | ||
_ => { | ||
return plan_err!("The array_to_string function can only accept List/LargeList/FixedSizeList."); | ||
} | ||
}) | ||
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> { | ||
Ok(Utf8) | ||
} | ||
|
||
fn invoke_with_args( | ||
|
@@ -282,16 +300,10 @@ impl ScalarUDFImpl for StringToArray { | |
} | ||
|
||
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> { | ||
Ok(match arg_types[0] { | ||
Utf8 | Utf8View | LargeUtf8 => { | ||
List(Arc::new(Field::new_list_field(arg_types[0].clone(), true))) | ||
} | ||
_ => { | ||
return plan_err!( | ||
"The string_to_array function can only accept Utf8, Utf8View or LargeUtf8." | ||
); | ||
} | ||
}) | ||
Ok(List(Arc::new(Field::new_list_field( | ||
arg_types[0].clone(), | ||
true, | ||
)))) | ||
Comment on lines
-285
to
+306
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is for string to array, but is similar to above in that type signature already ensures the arguments are string |
||
} | ||
|
||
fn invoke_with_args( | ||
|
@@ -368,6 +380,20 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> { | |
|
||
Ok(arg) | ||
} | ||
FixedSizeList(..) => { | ||
let list_array = as_fixed_size_list_array(&arr)?; | ||
for i in 0..list_array.len() { | ||
compute_array_to_string( | ||
arg, | ||
list_array.value(i), | ||
delimiter.clone(), | ||
null_string.clone(), | ||
with_null_string, | ||
)?; | ||
} | ||
|
||
Ok(arg) | ||
} | ||
Comment on lines
+383
to
+396
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is handling for when an element of the input list is a |
||
LargeList(..) => { | ||
let list_array = as_large_list_array(&arr)?; | ||
for i in 0..list_array.len() { | ||
|
@@ -449,9 +475,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> { | |
Ok(StringArray::from(res)) | ||
} | ||
|
||
let arr_type = arr.data_type(); | ||
let string_arr = match arr_type { | ||
List(_) | FixedSizeList(_, _) => { | ||
let string_arr = match arr.data_type() { | ||
List(_) => { | ||
let list_array = as_list_array(&arr)?; | ||
generate_string_array::<i32>( | ||
list_array, | ||
|
@@ -469,29 +494,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> { | |
with_null_string, | ||
)? | ||
} | ||
_ => { | ||
let mut arg = String::from(""); | ||
let mut res: Vec<Option<String>> = Vec::new(); | ||
// delimiter length is 1 | ||
assert_eq!(delimiters.len(), 1); | ||
let delimiter = delimiters[0].unwrap(); | ||
let s = compute_array_to_string( | ||
&mut arg, | ||
Arc::clone(arr), | ||
delimiter.to_string(), | ||
null_string, | ||
with_null_string, | ||
)? | ||
.clone(); | ||
|
||
if !s.is_empty() { | ||
let s = s.strip_suffix(delimiter).unwrap().to_string(); | ||
res.push(Some(s)); | ||
} else { | ||
res.push(Some(s)); | ||
} | ||
StringArray::from(res) | ||
} | ||
Comment on lines
-472
to
-494
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I removed this arm as the array (which is the first argument) must be a List type (List, Fixed or Large) by this point so this arm didn't make sense; even before my changes I don't think this arm was ever reachable as the |
||
// Signature guards against this arm | ||
_ => return exec_err!("array_to_string expects list as first argument"), | ||
}; | ||
|
||
Ok(Arc::new(string_arr)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4703,6 +4703,11 @@ select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), | |
---- | ||
h,e,l,l,o 1-2-3-4-5 1|2|3 | ||
|
||
query TTT | ||
select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'FixedSizeList(5, Utf8)'), ','), array_to_string(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)'), '-'), array_to_string(arrow_cast([1.0, 2.0, 3.0], 'FixedSizeList(3, Float64)'), '|'); | ||
---- | ||
h,e,l,l,o 1-2-3-4-5 1|2|3 | ||
|
||
# array_to_string scalar function with nulls #2 | ||
query TTT | ||
select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), array_to_string(make_array(NULL, 2, NULL, 4, 5), '-', 'nil'), array_to_string(make_array(1.0, NULL, 3.0), '|', '0'); | ||
|
@@ -4714,6 +4719,16 @@ select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'Large | |
---- | ||
h,-,-,-,o nil-2-nil-4-5 1|0|3 | ||
|
||
query TTT | ||
select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 'FixedSizeList(5, Utf8)'), ',', '-'), array_to_string(arrow_cast(make_array(NULL, 2, NULL, 4, 5), 'FixedSizeList(5, Int64)'), '-', 'nil'), array_to_string(arrow_cast(make_array(1.0, NULL, 3.0), 'FixedSizeList(3, Float64)'), '|', '0'); | ||
---- | ||
h,-,-,-,o nil-2-nil-4-5 1|0|3 | ||
|
||
query T | ||
select array_to_string(arrow_cast([arrow_cast([NULL, 'a'], 'FixedSizeList(2, Utf8)'), NULL], 'FixedSizeList(2, FixedSizeList(2, Utf8))'), ',', '-'); | ||
---- | ||
-,a,-,- | ||
Comment on lines
+4727
to
+4730
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is interesting; it's formatting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll look into this as a follow-on issue There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Raised #17692 |
||
|
||
# array_to_string with columns #1 | ||
|
||
# For reference | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't need to guard here since type signature should enforce for us now