Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: write hive partitions for any int/uint/float #15337

Merged
merged 1 commit into from
Mar 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 58 additions & 2 deletions datafusion/datasource/src/write/demux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ use arrow::array::{
};
use arrow::datatypes::{DataType, Schema};
use datafusion_common::cast::{
as_boolean_array, as_date32_array, as_date64_array, as_int32_array, as_int64_array,
as_string_array, as_string_view_array,
as_boolean_array, as_date32_array, as_date64_array, as_float16_array,
as_float32_array, as_float64_array, as_int16_array, as_int32_array, as_int64_array,
as_int8_array, as_string_array, as_string_view_array, as_uint16_array,
as_uint32_array, as_uint64_array, as_uint8_array,
};
use datafusion_common::{exec_datafusion_err, not_impl_err, DataFusionError};
use datafusion_common_runtime::SpawnedTask;
Expand Down Expand Up @@ -407,6 +409,18 @@ fn compute_partition_keys_by_row<'a>(
partition_values.push(Cow::from(date));
}
}
DataType::Int8 => {
let array = as_int8_array(col_array)?;
for i in 0..rb.num_rows() {
partition_values.push(Cow::from(array.value(i).to_string()));
}
}
DataType::Int16 => {
let array = as_int16_array(col_array)?;
for i in 0..rb.num_rows() {
partition_values.push(Cow::from(array.value(i).to_string()));
}
}
DataType::Int32 => {
let array = as_int32_array(col_array)?;
for i in 0..rb.num_rows() {
Expand All @@ -419,6 +433,48 @@ fn compute_partition_keys_by_row<'a>(
partition_values.push(Cow::from(array.value(i).to_string()));
}
}
DataType::UInt8 => {
let array = as_uint8_array(col_array)?;
for i in 0..rb.num_rows() {
partition_values.push(Cow::from(array.value(i).to_string()));
}
}
DataType::UInt16 => {
let array = as_uint16_array(col_array)?;
for i in 0..rb.num_rows() {
partition_values.push(Cow::from(array.value(i).to_string()));
}
}
DataType::UInt32 => {
let array = as_uint32_array(col_array)?;
for i in 0..rb.num_rows() {
partition_values.push(Cow::from(array.value(i).to_string()));
}
}
DataType::UInt64 => {
let array = as_uint64_array(col_array)?;
for i in 0..rb.num_rows() {
partition_values.push(Cow::from(array.value(i).to_string()));
}
}
DataType::Float16 => {
let array = as_float16_array(col_array)?;
for i in 0..rb.num_rows() {
partition_values.push(Cow::from(array.value(i).to_string()));
}
}
DataType::Float32 => {
let array = as_float32_array(col_array)?;
for i in 0..rb.num_rows() {
partition_values.push(Cow::from(array.value(i).to_string()));
}
}
DataType::Float64 => {
let array = as_float64_array(col_array)?;
for i in 0..rb.num_rows() {
partition_values.push(Cow::from(array.value(i).to_string()));
}
}
DataType::Dictionary(_, _) => {
downcast_dictionary_array!(
col_array => {
Expand Down
34 changes: 23 additions & 11 deletions datafusion/sqllogictest/test_files/copy.slt
Original file line number Diff line number Diff line change
Expand Up @@ -110,24 +110,36 @@ a

# Copy to directory as partitioned files
query I
COPY (values (1::int, 2::bigint, 19968::date, arrow_cast(1725235200000, 'Date64'), false, 'x'),
(11::int, 22::bigint, 19969::date, arrow_cast(1725148800000, 'Date64'), true, 'y')
COPY (values (arrow_cast(1, 'Int8'), arrow_cast(2, 'UInt8'), arrow_cast(3, 'Int16'), arrow_cast(4, 'UInt16'),
arrow_cast(5, 'Int32'), arrow_cast(6, 'UInt32'), arrow_cast(7, 'Int64'), arrow_cast(8, 'UInt64'),
arrow_cast(9.1015625, 'Float16'), arrow_cast(10.1, 'Float32'), arrow_cast(11.1, 'Float64'), 19968::date,
arrow_cast(1725235200000, 'Date64'), false, 'x'),
(arrow_cast(11, 'Int8'), arrow_cast(22, 'UInt8'), arrow_cast(33, 'Int16'), arrow_cast(44, 'UInt16'),
arrow_cast(55, 'Int32'), arrow_cast(66, 'UInt32'), arrow_cast(77, 'Int64'), arrow_cast(88, 'UInt64'),
arrow_cast(9.203125, 'Float16'), arrow_cast(10.2, 'Float32'), arrow_cast(11.2, 'Float64'), 19969::date,
arrow_cast(1725148800000, 'Date64'), true, 'y')
)
TO 'test_files/scratch/copy/partitioned_table5/' STORED AS parquet PARTITIONED BY (column1, column2, column3, column4, column5)
TO 'test_files/scratch/copy/partitioned_table5/' STORED AS parquet PARTITIONED BY (column1, column2, column3, column4,
column5, column6, column7, column8, column9, column10, column11, column12, column13, column14)
OPTIONS ('format.compression' 'zstd(10)');
----
2

# validate partitioning
statement ok
CREATE EXTERNAL TABLE validate_partitioned_parquet5 (column1 int, column2 bigint, column3 date, column4 date, column5 boolean, column6 varchar) STORED AS PARQUET
LOCATION 'test_files/scratch/copy/partitioned_table5/' PARTITIONED BY (column1, column2, column3, column4, column5);

query IIDDBT
select column1, column2, column3, column4, column5, column6 from validate_partitioned_parquet5 order by column1,column2,column3,column4,column5;
----
1 2 2024-09-02 2024-09-02 false x
11 22 2024-09-03 2024-09-01 true y
CREATE EXTERNAL TABLE validate_partitioned_parquet5 (column1 int, column2 int, column3 int, column4 int, column5 int,
column6 int, column7 bigint, column8 bigint, column9 float, column10 float, column11 float, column12 date,
column13 date, column14 boolean, column15 varchar) STORED AS PARQUET
LOCATION 'test_files/scratch/copy/partitioned_table5/' PARTITIONED BY (column1, column2, column3, column4, column5,
column6, column7, column8, column9, column10, column11, column12, column13, column14);

query IIIIIIIIRRRDDBT
select column1, column2, column3, column4, column5, column6, column7, column8, column9, column10, column11, column12,
column13, column14, column15 from validate_partitioned_parquet5 order by column1, column2, column3, column4,
column5, column6, column7, column8, column9, column10, column11, column12, column13;
----
1 2 3 4 5 6 7 8 9.1015625 10.1 11.1 2024-09-02 2024-09-02 false x
11 22 33 44 55 66 77 88 9.203125 10.2 11.2 2024-09-03 2024-09-01 true y


statement ok
Expand Down