From 71b97de5e9f188f946e653a5c8d78ed9b5da270a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 18 Dec 2022 07:18:36 -0500 Subject: [PATCH 1/3] Add `--complete` auto completion mode to `sqllogictests` --- datafusion/core/Cargo.toml | 2 +- datafusion/core/tests/sql/aggregates.rs | 34 ----- datafusion/core/tests/sqllogictests/README.md | 17 ++- .../core/tests/sqllogictests/src/error.rs | 9 ++ .../core/tests/sqllogictests/src/main.rs | 142 +++++++++++------- .../sqllogictests/test_files/aggregate.slt | 14 ++ .../tests/sqllogictests/test_files/ddl.slt | 6 + 7 files changed, 131 insertions(+), 93 deletions(-) diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 898237a83589..18f5901689bd 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -109,7 +109,7 @@ doc-comment = "0.3" env_logger = "0.10" parquet-test-utils = { path = "../../parquet-test-utils" } rstest = "0.16.0" -sqllogictest = "0.9.0" +sqllogictest = "0.10.0" sqlparser = "0.28" test-utils = { path = "../../test-utils" } thiserror = "1.0.37" diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs index 2dd3a8dec8d3..dcd30909a935 100644 --- a/datafusion/core/tests/sql/aggregates.rs +++ b/datafusion/core/tests/sql/aggregates.rs @@ -1079,37 +1079,3 @@ async fn aggregate_with_alias() -> Result<()> { ); Ok(()) } - -#[tokio::test] -async fn array_agg_zero() -> Result<()> { - let ctx = SessionContext::new(); - // 2 different aggregate functions: avg and sum(distinct) - let sql = "SELECT ARRAY_AGG([]);"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+------------------------+", - "| ARRAYAGG(List([NULL])) |", - "+------------------------+", - "| [] |", - "+------------------------+", - ]; - assert_batches_eq!(expected, &actual); - Ok(()) -} - -#[tokio::test] -async fn array_agg_one() -> Result<()> { - let ctx = SessionContext::new(); - // 2 different aggregate functions: avg and sum(distinct) - let sql = "SELECT ARRAY_AGG([1]);"; - let actual = execute_to_batches(&ctx, sql).await; - let expected = vec![ - "+---------------------+", - "| ARRAYAGG(List([1])) |", - "+---------------------+", - "| [[1]] |", - "+---------------------+", - ]; - assert_batches_eq!(expected, &actual); - Ok(()) -} diff --git a/datafusion/core/tests/sqllogictests/README.md b/datafusion/core/tests/sqllogictests/README.md index 648e0a3eaacb..15a4e2834193 100644 --- a/datafusion/core/tests/sqllogictests/README.md +++ b/datafusion/core/tests/sqllogictests/README.md @@ -21,7 +21,11 @@ This is the Datafusion implementation of [sqllogictest](https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki). We use [sqllogictest-rs](https://github.com/risinglightdb/sqllogictest-rs) as a parser/runner of `.slt` files in `test_files`. -#### Running tests +#### Running tests: Validation Mode + +In this model, `sqllogictests` runs the statements and queries in a `.slt` file, comparing the expected output in the fule to the output produced by that run. + +Run all tests suites in validation mode ```shell cargo test -p datafusion --test sqllogictests @@ -40,6 +44,17 @@ Run only the tests in `information_schema.slt`: cargo test -p datafusion --test sqllogictests -- information ``` +#### Updating tests: Completion Mode + +In test script completion mode, `sqllogictests` reads a prototype script and runs the statements and queries against the database engine. The output is is a full script that is a copy of the prototype script with result inserted. + +You can update tests by passing the `--complete` argument. + +```shell +# Update ddl.slt with output from running +cargo test -p datafusion --test sqllogictests -- ddl +``` + #### sqllogictests > :warning: **Warning**:Datafusion's sqllogictest implementation and migration is still in progress. Definitions taken from https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki diff --git a/datafusion/core/tests/sqllogictests/src/error.rs b/datafusion/core/tests/sqllogictests/src/error.rs index d645e054b185..a7f252fe71ea 100644 --- a/datafusion/core/tests/sqllogictests/src/error.rs +++ b/datafusion/core/tests/sqllogictests/src/error.rs @@ -38,6 +38,9 @@ pub enum DFSqlLogicTestError { /// Error from arrow-rs #[error("Arrow error: {0}")] Arrow(ArrowError), + /// Generic error + #[error("Other Error: {0}")] + Other(String), } impl From for DFSqlLogicTestError { @@ -63,3 +66,9 @@ impl From for DFSqlLogicTestError { DFSqlLogicTestError::Arrow(value) } } + +impl From for DFSqlLogicTestError { + fn from(value: String) -> Self { + DFSqlLogicTestError::Other(value) + } +} diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs index e8c44babe98f..9a4d5bbac6bb 100644 --- a/datafusion/core/tests/sqllogictests/src/main.rs +++ b/datafusion/core/tests/sqllogictests/src/main.rs @@ -23,7 +23,7 @@ use log::info; use normalize::convert_batches; use sqllogictest::DBOutput; use sqlparser::ast::Statement as SQLStatement; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::time::Duration; use crate::error::{DFSqlLogicTestError, Result}; @@ -78,16 +78,19 @@ pub async fn main() -> Result<()> { #[cfg(not(target_family = "windows"))] pub async fn main() -> Result<()> { // Enable logging (e.g. set RUST_LOG=debug to see debug logs) + + use sqllogictest::{default_validator, update_test_file}; env_logger::init(); - // run each file using its own new DB - // - // Note: can't use tester.run_parallel_async() - // as that will reuse the same SessionContext - // - // We could run these tests in parallel eventually if we wanted. + let options = Options::new(); + + // default to all files in test directory filtering based on name + let files: Vec<_> = std::fs::read_dir(TEST_DIRECTORY) + .unwrap() + .map(|path| path.unwrap().path()) + .filter(|path| options.check_test_file(path.as_path())) + .collect(); - let files = get_test_files(); info!("Running test files {:?}", files); for path in files { @@ -95,61 +98,29 @@ pub async fn main() -> Result<()> { let file_name = path.file_name().unwrap().to_str().unwrap().to_string(); + // Create the test runner let ctx = context_for_test_file(&file_name).await; - - let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name }); - tester.run_file_async(path).await?; + let mut runner = sqllogictest::Runner::new(DataFusion { ctx, file_name }); + + // run each file using its own new DB + // + // We could run these tests in parallel eventually if we wanted. + if options.complete_mode { + info!("Using complete mode to complete {}", path.display()); + let col_separator = " "; + let validator = default_validator; + update_test_file(path, runner, col_separator, validator) + .await + .map_err(|e| e.to_string())?; + } else { + // run the test normally: + runner.run_file_async(path).await?; + } } Ok(()) } -/// Gets a list of test files to execute. If there were arguments -/// passed to the program treat it as a cargo test filter (substring match on filenames) -fn get_test_files() -> Vec { - info!("Test directory: {}", TEST_DIRECTORY); - - let args: Vec<_> = std::env::args().collect(); - - // treat args after the first as filters to run (substring matching) - let filters = if !args.is_empty() { - args.iter() - .skip(1) - .map(|arg| arg.as_str()) - .collect::>() - } else { - vec![] - }; - - // default to all files in test directory filtering based on name - std::fs::read_dir(TEST_DIRECTORY) - .unwrap() - .map(|path| path.unwrap().path()) - .filter(|path| check_test_file(&filters, path.as_path())) - .collect() -} - -/// because this test can be run as a cargo test, commands like -/// -/// ```shell -/// cargo test foo -/// ``` -/// -/// Will end up passing `foo` as a command line argument. -/// -/// be compatible with this, treat the command line arguments as a -/// filter and that does a substring match on each input. -/// returns true f this path should be run -fn check_test_file(filters: &[&str], path: &Path) -> bool { - if filters.is_empty() { - return true; - } - - // otherwise check if any filter matches - let path_str = path.to_string_lossy(); - filters.iter().any(|filter| path_str.contains(filter)) -} - /// Create a SessionContext, configured for the specific test async fn context_for_test_file(file_name: &str) -> SessionContext { match file_name { @@ -189,3 +160,60 @@ async fn run_query(ctx: &SessionContext, sql: impl Into) -> Result, + + /// Auto complete mode to fill out expected results + complete_mode: bool, +} + +impl Options { + fn new() -> Self { + let args: Vec<_> = std::env::args().collect(); + + let complete_mode = args.iter().any(|a| a == "--complete"); + + // treat args after the first as filters to run (substring matching) + let filters = if !args.is_empty() { + args.into_iter() + .skip(1) + // ignore command line arguments like `--complete` + .filter(|arg| !arg.as_str().starts_with("--")) + .map(|arg| arg.to_string()) + .collect::>() + } else { + vec![] + }; + + Self { + filters, + complete_mode, + } + } + + /// Because this test can be run as a cargo test, commands like + /// + /// ```shell + /// cargo test foo + /// ``` + /// + /// Will end up passing `foo` as a command line argument. + /// + /// To be compatible with this, treat the command line arguments as a + /// filter and that does a substring match on each input. returns + /// true f this path should be run + fn check_test_file(&self, path: &Path) -> bool { + if self.filters.is_empty() { + return true; + } + + // otherwise check if any filter matches + let path_str = path.to_string_lossy(); + self.filters.iter().any(|filter| path_str.contains(filter)) + } +} diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt index 90241e3e8a6a..7a1b012b8410 100644 --- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt +++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt @@ -1077,3 +1077,17 @@ b 5 c 4 d 4 e 4 + + + +# array_agg_zero +query U +SELECT ARRAY_AGG([]); +---- +[] + +# array_agg_one +query U +SELECT ARRAY_AGG([1]); +---- +[[1]] diff --git a/datafusion/core/tests/sqllogictests/test_files/ddl.slt b/datafusion/core/tests/sqllogictests/test_files/ddl.slt index 7445157eec14..2e0667b5807c 100644 --- a/datafusion/core/tests/sqllogictests/test_files/ddl.slt +++ b/datafusion/core/tests/sqllogictests/test_files/ddl.slt @@ -411,5 +411,11 @@ SELECT * FROM aggregate_simple order by c1 LIMIT 1; ---- 0.00001 0.000000000001 true +query CCC +SELECT * FROM aggregate_simple order by c1 DESC LIMIT 1; +---- +0.00005 0.000000000005 true + + statement ok DROP TABLE aggregate_simple From 0d1e27ac37a5e10dbd2d22a3b9da32e7f17a40cd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 18 Dec 2022 07:49:21 -0500 Subject: [PATCH 2/3] clippy --- datafusion/core/tests/sqllogictests/src/main.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs index 9a4d5bbac6bb..b89edcc56369 100644 --- a/datafusion/core/tests/sqllogictests/src/main.rs +++ b/datafusion/core/tests/sqllogictests/src/main.rs @@ -184,7 +184,6 @@ impl Options { .skip(1) // ignore command line arguments like `--complete` .filter(|arg| !arg.as_str().starts_with("--")) - .map(|arg| arg.to_string()) .collect::>() } else { vec![] From 79f134592be27c162eaa87b47af93d8030553f31 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 22 Dec 2022 06:17:52 -0500 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: xudong.w --- datafusion/core/tests/sqllogictests/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/core/tests/sqllogictests/README.md b/datafusion/core/tests/sqllogictests/README.md index 15a4e2834193..d472245783fa 100644 --- a/datafusion/core/tests/sqllogictests/README.md +++ b/datafusion/core/tests/sqllogictests/README.md @@ -23,7 +23,7 @@ This is the Datafusion implementation of [sqllogictest](https://www.sqlite.org/s #### Running tests: Validation Mode -In this model, `sqllogictests` runs the statements and queries in a `.slt` file, comparing the expected output in the fule to the output produced by that run. +In this model, `sqllogictests` runs the statements and queries in a `.slt` file, comparing the expected output in the file to the output produced by that run. Run all tests suites in validation mode @@ -52,7 +52,7 @@ You can update tests by passing the `--complete` argument. ```shell # Update ddl.slt with output from running -cargo test -p datafusion --test sqllogictests -- ddl +cargo test -p datafusion --test sqllogictests -- ddl --complete ``` #### sqllogictests