Skip to content

Commit 885a49c

Browse files
authored
Merge branch 'main' into insta_datasource
2 parents 35ae483 + fc93374 commit 885a49c

File tree

25 files changed

+943
-540
lines changed

25 files changed

+943
-540
lines changed

Cargo.lock

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion-cli/src/main.rs

+23-19
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,8 @@ fn extract_memory_pool_size(size: &str) -> Result<usize, String> {
322322
#[cfg(test)]
323323
mod tests {
324324
use super::*;
325-
use datafusion::assert_batches_eq;
325+
use datafusion::common::test_util::batches_to_string;
326+
use insta::assert_snapshot;
326327

327328
fn assert_conversion(input: &str, expected: Result<usize, String>) {
328329
let result = extract_memory_pool_size(input);
@@ -391,21 +392,26 @@ mod tests {
391392
let df = ctx.sql(sql).await?;
392393
let rbs = df.collect().await?;
393394

394-
let excepted = [
395-
"+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+",
396-
"| filename | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |",
397-
"+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+",
398-
"| ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0 | 2 | 1 | 123 | 0 | 125 | 4 | \"f0.list.item\" | INT64 | 1 | 4 | 0 | | 1 | 4 | SNAPPY | [RLE_DICTIONARY, PLAIN, RLE] | | 4 | 46 | 121 | 123 |",
399-
"+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+",
400-
];
401-
assert_batches_eq!(excepted, &rbs);
395+
assert_snapshot!(batches_to_string(&rbs), @r#"
396+
+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
397+
| filename | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |
398+
+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
399+
| ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0 | 2 | 1 | 123 | 0 | 125 | 4 | "f0.list.item" | INT64 | 1 | 4 | 0 | | 1 | 4 | SNAPPY | [RLE_DICTIONARY, PLAIN, RLE] | | 4 | 46 | 121 | 123 |
400+
+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
401+
"#);
402402

403403
// input with double quote
404404
let sql =
405405
"SELECT * FROM parquet_metadata(\"../datafusion/core/tests/data/fixed_size_list_array.parquet\")";
406406
let df = ctx.sql(sql).await?;
407407
let rbs = df.collect().await?;
408-
assert_batches_eq!(excepted, &rbs);
408+
assert_snapshot!(batches_to_string(&rbs), @r#"
409+
+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
410+
| filename | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |
411+
+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
412+
| ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0 | 2 | 1 | 123 | 0 | 125 | 4 | "f0.list.item" | INT64 | 1 | 4 | 0 | | 1 | 4 | SNAPPY | [RLE_DICTIONARY, PLAIN, RLE] | | 4 | 46 | 121 | 123 |
413+
+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
414+
"#);
409415

410416
Ok(())
411417
}
@@ -421,15 +427,13 @@ mod tests {
421427
let df = ctx.sql(sql).await?;
422428
let rbs = df.collect().await?;
423429

424-
let excepted = [
425-
426-
"+-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+",
427-
"| filename | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |",
428-
"+-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+",
429-
"| ../parquet-testing/data/data_index_bloom_encoding_stats.parquet | 0 | 14 | 1 | 163 | 0 | 4 | 14 | \"String\" | BYTE_ARRAY | Hello | today | 0 | | Hello | today | GZIP(GzipLevel(6)) | [BIT_PACKED, RLE, PLAIN] | | | 4 | 152 | 163 |",
430-
"+-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+"
431-
];
432-
assert_batches_eq!(excepted, &rbs);
430+
assert_snapshot!(batches_to_string(&rbs),@r#"
431+
+-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
432+
| filename | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |
433+
+-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
434+
| ../parquet-testing/data/data_index_bloom_encoding_stats.parquet | 0 | 14 | 1 | 163 | 0 | 4 | 14 | "String" | BYTE_ARRAY | Hello | today | 0 | | Hello | today | GZIP(GzipLevel(6)) | [BIT_PACKED, RLE, PLAIN] | | | 4 | 152 | 163 |
435+
+-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
436+
"#);
433437

434438
Ok(())
435439
}

datafusion/common/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,5 @@ web-time = "1.1.0"
7373

7474
[dev-dependencies]
7575
chrono = { workspace = true }
76+
insta = { workspace = true }
7677
rand = { workspace = true }

datafusion/common/src/scalar/mod.rs

+34-39
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ use crate::cast::{
3838
as_fixed_size_binary_array, as_fixed_size_list_array,
3939
};
4040
use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
41+
use crate::format::DEFAULT_CAST_OPTIONS;
4142
use crate::hash_utils::create_hashes;
4243
use crate::utils::SingleRowListArrayBuilder;
4344
use arrow::array::{
@@ -58,8 +59,6 @@ use arrow::datatypes::{
5859
UInt8Type, UnionFields, UnionMode, DECIMAL128_MAX_PRECISION,
5960
};
6061
use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
61-
62-
use crate::format::DEFAULT_CAST_OPTIONS;
6362
use half::f16;
6463
pub use struct_builder::ScalarStructBuilder;
6564

@@ -3976,14 +3975,15 @@ mod tests {
39763975
as_map_array, as_string_array, as_struct_array, as_uint32_array, as_uint64_array,
39773976
};
39783977

3979-
use crate::assert_batches_eq;
3978+
use crate::test_util::batches_to_string;
39803979
use arrow::array::{types::Float64Type, NullBufferBuilder};
39813980
use arrow::buffer::{Buffer, OffsetBuffer};
39823981
use arrow::compute::{is_null, kernels};
39833982
use arrow::datatypes::Fields;
39843983
use arrow::error::ArrowError;
39853984
use arrow::util::pretty::pretty_format_columns;
39863985
use chrono::NaiveDate;
3986+
use insta::assert_snapshot;
39873987
use rand::Rng;
39883988

39893989
#[test]
@@ -6910,14 +6910,13 @@ mod tests {
69106910

69116911
//verify compared to arrow display
69126912
let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
6913-
let expected = [
6914-
"+-------------+",
6915-
"| s |",
6916-
"+-------------+",
6917-
"| {a: 1, b: } |",
6918-
"+-------------+",
6919-
];
6920-
assert_batches_eq!(&expected, &[batch]);
6913+
assert_snapshot!(batches_to_string(&[batch]), @r"
6914+
+-------------+
6915+
| s |
6916+
+-------------+
6917+
| {a: 1, b: } |
6918+
+-------------+
6919+
");
69216920
}
69226921

69236922
#[test]
@@ -6946,14 +6945,13 @@ mod tests {
69466945

69476946
//verify compared to arrow display
69486947
let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
6949-
let expected = [
6950-
"+--------------+",
6951-
"| s |",
6952-
"+--------------+",
6953-
"| {a: 1, b: 2} |",
6954-
"+--------------+",
6955-
];
6956-
assert_batches_eq!(&expected, &[batch]);
6948+
assert_snapshot!(batches_to_string(&[batch]), @r"
6949+
+--------------+
6950+
| s |
6951+
+--------------+
6952+
| {a: 1, b: 2} |
6953+
+--------------+
6954+
");
69576955
}
69586956

69596957
#[test]
@@ -6969,15 +6967,13 @@ mod tests {
69696967
//verify compared to arrow display
69706968
let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap();
69716969

6972-
#[rustfmt::skip]
6973-
let expected = [
6974-
"+---+",
6975-
"| s |",
6976-
"+---+",
6977-
"| |",
6978-
"+---+",
6979-
];
6980-
assert_batches_eq!(&expected, &[batch]);
6970+
assert_snapshot!(batches_to_string(&[batch]), @r"
6971+
+---+
6972+
| s |
6973+
+---+
6974+
| |
6975+
+---+
6976+
");
69816977
}
69826978

69836979
#[test]
@@ -7011,17 +7007,16 @@ mod tests {
70117007

70127008
//verify compared to arrow display
70137009
let batch = RecordBatch::try_from_iter(vec![("m", arr as _)]).unwrap();
7014-
let expected = [
7015-
"+--------------------+",
7016-
"| m |",
7017-
"+--------------------+",
7018-
"| {joe: 1} |",
7019-
"| {blogs: 2, foo: 4} |",
7020-
"| {} |",
7021-
"| |",
7022-
"+--------------------+",
7023-
];
7024-
assert_batches_eq!(&expected, &[batch]);
7010+
assert_snapshot!(batches_to_string(&[batch]), @r"
7011+
+--------------------+
7012+
| m |
7013+
+--------------------+
7014+
| {joe: 1} |
7015+
| {blogs: 2, foo: 4} |
7016+
| {} |
7017+
| |
7018+
+--------------------+
7019+
");
70257020
}
70267021

70277022
#[test]

datafusion/core/src/datasource/physical_plan/parquet.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ mod tests {
277277
| 1 | |
278278
+----+----+
279279
"###);
280-
280+
281281
let metrics = rt.parquet_exec.metrics().unwrap();
282282
let metric = get_value(&metrics, "pushdown_rows_pruned");
283283
assert_eq!(metric, 2, "Expected all rows to be pruned");

datafusion/core/src/execution/context/csv.rs

+9-9
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,9 @@ impl SessionContext {
8989
#[cfg(test)]
9090
mod tests {
9191
use super::*;
92-
use crate::assert_batches_eq;
9392
use crate::test_util::{plan_and_collect, populate_csv_partitions};
93+
use datafusion_common::test_util::batches_to_string;
94+
use insta::assert_snapshot;
9495

9596
use tempfile::TempDir;
9697

@@ -115,14 +116,13 @@ mod tests {
115116
plan_and_collect(&ctx, "SELECT sum(c1), sum(c2), count(*) FROM test").await?;
116117

117118
assert_eq!(results.len(), 1);
118-
let expected = [
119-
"+--------------+--------------+----------+",
120-
"| sum(test.c1) | sum(test.c2) | count(*) |",
121-
"+--------------+--------------+----------+",
122-
"| 10 | 110 | 20 |",
123-
"+--------------+--------------+----------+",
124-
];
125-
assert_batches_eq!(expected, &results);
119+
assert_snapshot!(batches_to_string(&results), @r"
120+
+--------------+--------------+----------+
121+
| sum(test.c1) | sum(test.c2) | count(*) |
122+
+--------------+--------------+----------+
123+
| 10 | 110 | 20 |
124+
+--------------+--------------+----------+
125+
");
126126

127127
Ok(())
128128
}

0 commit comments

Comments
 (0)