Skip to content

Commit c929a1c

Browse files
cht42alamb
andauthored
ArraySort: support structs (apache#15527)
* ArraySort: support structs * fix * fix * fix * Update datafusion/functions-nested/src/sort.rs --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 7317198 commit c929a1c

File tree

2 files changed

+23
-2
lines changed

2 files changed

+23
-2
lines changed

datafusion/functions-nested/src/sort.rs

+18-2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
use crate::utils::make_scalar_function;
2121
use arrow::array::{new_null_array, Array, ArrayRef, ListArray, NullBufferBuilder};
2222
use arrow::buffer::OffsetBuffer;
23+
use arrow::compute::SortColumn;
2324
use arrow::datatypes::DataType::{FixedSizeList, LargeList, List};
2425
use arrow::datatypes::{DataType, Field};
2526
use arrow::{compute, compute::SortOptions};
@@ -207,9 +208,24 @@ pub fn array_sort_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
207208
valid.append_null();
208209
} else {
209210
let arr_ref = list_array.value(i);
210-
let arr_ref = arr_ref.as_ref();
211211

212-
let sorted_array = compute::sort(arr_ref, sort_option)?;
212+
// arrow sort kernel does not support Structs, so use
213+
// lexsort_to_indices instead:
214+
// https://github.com/apache/arrow-rs/issues/6911#issuecomment-2562928843
215+
let sorted_array = match arr_ref.data_type() {
216+
DataType::Struct(_) => {
217+
let sort_columns: Vec<SortColumn> = vec![SortColumn {
218+
values: Arc::clone(&arr_ref),
219+
options: sort_option,
220+
}];
221+
let indices = compute::lexsort_to_indices(&sort_columns, None)?;
222+
compute::take(arr_ref.as_ref(), &indices, None)?
223+
}
224+
_ => {
225+
let arr_ref = arr_ref.as_ref();
226+
compute::sort(arr_ref, sort_option)?
227+
}
228+
};
213229
array_lengths.push(sorted_array.len());
214230
arrays.push(sorted_array);
215231
valid.append_non_null();

datafusion/sqllogictest/test_files/array.slt

+5
Original file line numberDiff line numberDiff line change
@@ -2396,6 +2396,11 @@ NULL NULL
23962396
NULL NULL
23972397
NULL NULL
23982398

2399+
query ?
2400+
select array_sort([struct('foo', 3), struct('foo', 1), struct('bar', 1)])
2401+
----
2402+
[{c0: bar, c1: 1}, {c0: foo, c1: 1}, {c0: foo, c1: 3}]
2403+
23992404
## test with argument of incorrect types
24002405
query error DataFusion error: Execution error: the second parameter of array_sort expects DESC or ASC
24012406
select array_sort([1, 3, null, 5, NULL, -5], 1), array_sort([1, 3, null, 5, NULL, -5], 'DESC', 1), array_sort([1, 3, null, 5, NULL, -5], 1, 1);

0 commit comments

Comments
 (0)