From 71b97de5e9f188f946e653a5c8d78ed9b5da270a Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 18 Dec 2022 07:18:36 -0500
Subject: [PATCH 1/3] Add `--complete` auto completion mode to `sqllogictests`

---
 datafusion/core/Cargo.toml                    |   2 +-
 datafusion/core/tests/sql/aggregates.rs       |  34 -----
 datafusion/core/tests/sqllogictests/README.md |  17 ++-
 .../core/tests/sqllogictests/src/error.rs     |   9 ++
 .../core/tests/sqllogictests/src/main.rs      | 142 +++++++++++-------
 .../sqllogictests/test_files/aggregate.slt    |  14 ++
 .../tests/sqllogictests/test_files/ddl.slt    |   6 +
 7 files changed, 131 insertions(+), 93 deletions(-)

diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 898237a83589..18f5901689bd 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -109,7 +109,7 @@ doc-comment = "0.3"
 env_logger = "0.10"
 parquet-test-utils = { path = "../../parquet-test-utils" }
 rstest = "0.16.0"
-sqllogictest = "0.9.0"
+sqllogictest = "0.10.0"
 sqlparser = "0.28"
 test-utils = { path = "../../test-utils" }
 thiserror = "1.0.37"
diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs
index 2dd3a8dec8d3..dcd30909a935 100644
--- a/datafusion/core/tests/sql/aggregates.rs
+++ b/datafusion/core/tests/sql/aggregates.rs
@@ -1079,37 +1079,3 @@ async fn aggregate_with_alias() -> Result<()> {
     );
     Ok(())
 }
-
-#[tokio::test]
-async fn array_agg_zero() -> Result<()> {
-    let ctx = SessionContext::new();
-    // 2 different aggregate functions: avg and sum(distinct)
-    let sql = "SELECT ARRAY_AGG([]);";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = vec![
-        "+------------------------+",
-        "| ARRAYAGG(List([NULL])) |",
-        "+------------------------+",
-        "| []                     |",
-        "+------------------------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
-
-#[tokio::test]
-async fn array_agg_one() -> Result<()> {
-    let ctx = SessionContext::new();
-    // 2 different aggregate functions: avg and sum(distinct)
-    let sql = "SELECT ARRAY_AGG([1]);";
-    let actual = execute_to_batches(&ctx, sql).await;
-    let expected = vec![
-        "+---------------------+",
-        "| ARRAYAGG(List([1])) |",
-        "+---------------------+",
-        "| [[1]]               |",
-        "+---------------------+",
-    ];
-    assert_batches_eq!(expected, &actual);
-    Ok(())
-}
diff --git a/datafusion/core/tests/sqllogictests/README.md b/datafusion/core/tests/sqllogictests/README.md
index 648e0a3eaacb..15a4e2834193 100644
--- a/datafusion/core/tests/sqllogictests/README.md
+++ b/datafusion/core/tests/sqllogictests/README.md
@@ -21,7 +21,11 @@
 
 This is the Datafusion implementation of [sqllogictest](https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki). We use [sqllogictest-rs](https://github.com/risinglightdb/sqllogictest-rs) as a parser/runner of `.slt` files in `test_files`.
 
-#### Running tests
+#### Running tests: Validation Mode
+
+In this model, `sqllogictests` runs the statements and queries in a `.slt` file, comparing the expected output in the fule to the output produced by that run.
+
+Run all tests suites in validation mode
 
 ```shell
 cargo test -p datafusion --test sqllogictests
@@ -40,6 +44,17 @@ Run only the tests in `information_schema.slt`:
 cargo test -p datafusion --test sqllogictests -- information
 ```
 
+#### Updating tests: Completion Mode
+
+In test script completion mode, `sqllogictests` reads a prototype script and runs the statements and queries against the database engine. The output is is a full script that is a copy of the prototype script with result inserted.
+
+You can update tests by passing the `--complete` argument.
+
+```shell
+# Update ddl.slt with output from running
+cargo test -p datafusion --test sqllogictests -- ddl
+```
+
 #### sqllogictests
 
 > :warning: **Warning**:Datafusion's sqllogictest implementation and migration is still in progress. Definitions taken from https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki
diff --git a/datafusion/core/tests/sqllogictests/src/error.rs b/datafusion/core/tests/sqllogictests/src/error.rs
index d645e054b185..a7f252fe71ea 100644
--- a/datafusion/core/tests/sqllogictests/src/error.rs
+++ b/datafusion/core/tests/sqllogictests/src/error.rs
@@ -38,6 +38,9 @@ pub enum DFSqlLogicTestError {
     /// Error from arrow-rs
     #[error("Arrow error: {0}")]
     Arrow(ArrowError),
+    /// Generic error
+    #[error("Other Error: {0}")]
+    Other(String),
 }
 
 impl From<TestError> for DFSqlLogicTestError {
@@ -63,3 +66,9 @@ impl From<ArrowError> for DFSqlLogicTestError {
         DFSqlLogicTestError::Arrow(value)
     }
 }
+
+impl From<String> for DFSqlLogicTestError {
+    fn from(value: String) -> Self {
+        DFSqlLogicTestError::Other(value)
+    }
+}
diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs
index e8c44babe98f..9a4d5bbac6bb 100644
--- a/datafusion/core/tests/sqllogictests/src/main.rs
+++ b/datafusion/core/tests/sqllogictests/src/main.rs
@@ -23,7 +23,7 @@ use log::info;
 use normalize::convert_batches;
 use sqllogictest::DBOutput;
 use sqlparser::ast::Statement as SQLStatement;
-use std::path::{Path, PathBuf};
+use std::path::Path;
 use std::time::Duration;
 
 use crate::error::{DFSqlLogicTestError, Result};
@@ -78,16 +78,19 @@ pub async fn main() -> Result<()> {
 #[cfg(not(target_family = "windows"))]
 pub async fn main() -> Result<()> {
     // Enable logging (e.g. set RUST_LOG=debug to see debug logs)
+
+    use sqllogictest::{default_validator, update_test_file};
     env_logger::init();
 
-    // run each file using its own new DB
-    //
-    // Note: can't use tester.run_parallel_async()
-    // as that will reuse the same SessionContext
-    //
-    // We could run these tests in parallel eventually if we wanted.
+    let options = Options::new();
+
+    // default to all files in test directory filtering based on name
+    let files: Vec<_> = std::fs::read_dir(TEST_DIRECTORY)
+        .unwrap()
+        .map(|path| path.unwrap().path())
+        .filter(|path| options.check_test_file(path.as_path()))
+        .collect();
 
-    let files = get_test_files();
     info!("Running test files {:?}", files);
 
     for path in files {
@@ -95,61 +98,29 @@ pub async fn main() -> Result<()> {
 
         let file_name = path.file_name().unwrap().to_str().unwrap().to_string();
 
+        // Create the test runner
         let ctx = context_for_test_file(&file_name).await;
-
-        let mut tester = sqllogictest::Runner::new(DataFusion { ctx, file_name });
-        tester.run_file_async(path).await?;
+        let mut runner = sqllogictest::Runner::new(DataFusion { ctx, file_name });
+
+        // run each file using its own new DB
+        //
+        // We could run these tests in parallel eventually if we wanted.
+        if options.complete_mode {
+            info!("Using complete mode to complete {}", path.display());
+            let col_separator = " ";
+            let validator = default_validator;
+            update_test_file(path, runner, col_separator, validator)
+                .await
+                .map_err(|e| e.to_string())?;
+        } else {
+            // run the test normally:
+            runner.run_file_async(path).await?;
+        }
     }
 
     Ok(())
 }
 
-/// Gets a list of test files to execute. If there were arguments
-/// passed to the program treat it as a cargo test filter (substring match on filenames)
-fn get_test_files() -> Vec<PathBuf> {
-    info!("Test directory: {}", TEST_DIRECTORY);
-
-    let args: Vec<_> = std::env::args().collect();
-
-    // treat args after the first as filters to run (substring matching)
-    let filters = if !args.is_empty() {
-        args.iter()
-            .skip(1)
-            .map(|arg| arg.as_str())
-            .collect::<Vec<_>>()
-    } else {
-        vec![]
-    };
-
-    // default to all files in test directory filtering based on name
-    std::fs::read_dir(TEST_DIRECTORY)
-        .unwrap()
-        .map(|path| path.unwrap().path())
-        .filter(|path| check_test_file(&filters, path.as_path()))
-        .collect()
-}
-
-/// because this test can be run as a cargo test, commands like
-///
-/// ```shell
-/// cargo test foo
-/// ```
-///
-/// Will end up passing `foo` as a command line argument.
-///
-/// be compatible with this, treat the command line arguments as a
-/// filter and that does a substring match on each input.
-/// returns true f this path should be run
-fn check_test_file(filters: &[&str], path: &Path) -> bool {
-    if filters.is_empty() {
-        return true;
-    }
-
-    // otherwise check if any filter matches
-    let path_str = path.to_string_lossy();
-    filters.iter().any(|filter| path_str.contains(filter))
-}
-
 /// Create a SessionContext, configured for the specific test
 async fn context_for_test_file(file_name: &str) -> SessionContext {
     match file_name {
@@ -189,3 +160,60 @@ async fn run_query(ctx: &SessionContext, sql: impl Into<String>) -> Result<DBOut
     let formatted_batches = convert_batches(results)?;
     Ok(formatted_batches)
 }
+
+/// Parsed command line options
+struct Options {
+    // regex like
+    /// arguments passed to the program which are treated as
+    /// cargo test filter (substring match on filenames)
+    filters: Vec<String>,
+
+    /// Auto complete mode to fill out expected results
+    complete_mode: bool,
+}
+
+impl Options {
+    fn new() -> Self {
+        let args: Vec<_> = std::env::args().collect();
+
+        let complete_mode = args.iter().any(|a| a == "--complete");
+
+        // treat args after the first as filters to run (substring matching)
+        let filters = if !args.is_empty() {
+            args.into_iter()
+                .skip(1)
+                // ignore command line arguments like `--complete`
+                .filter(|arg| !arg.as_str().starts_with("--"))
+                .map(|arg| arg.to_string())
+                .collect::<Vec<_>>()
+        } else {
+            vec![]
+        };
+
+        Self {
+            filters,
+            complete_mode,
+        }
+    }
+
+    /// Because this test can be run as a cargo test, commands like
+    ///
+    /// ```shell
+    /// cargo test foo
+    /// ```
+    ///
+    /// Will end up passing `foo` as a command line argument.
+    ///
+    /// To be compatible with this, treat the command line arguments as a
+    /// filter and that does a substring match on each input.  returns
+    /// true f this path should be run
+    fn check_test_file(&self, path: &Path) -> bool {
+        if self.filters.is_empty() {
+            return true;
+        }
+
+        // otherwise check if any filter matches
+        let path_str = path.to_string_lossy();
+        self.filters.iter().any(|filter| path_str.contains(filter))
+    }
+}
diff --git a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
index 90241e3e8a6a..7a1b012b8410 100644
--- a/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/aggregate.slt
@@ -1077,3 +1077,17 @@ b 5
 c 4
 d 4
 e 4
+
+
+
+# array_agg_zero
+query U
+SELECT ARRAY_AGG([]);
+----
+[]
+
+# array_agg_one
+query U
+SELECT ARRAY_AGG([1]);
+----
+[[1]]
diff --git a/datafusion/core/tests/sqllogictests/test_files/ddl.slt b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
index 7445157eec14..2e0667b5807c 100644
--- a/datafusion/core/tests/sqllogictests/test_files/ddl.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
@@ -411,5 +411,11 @@ SELECT * FROM aggregate_simple order by c1 LIMIT 1;
 ----
 0.00001 0.000000000001 true
 
+query CCC
+SELECT * FROM aggregate_simple order by c1 DESC LIMIT 1;
+----
+0.00005 0.000000000005 true
+
+
 statement ok
 DROP TABLE aggregate_simple

From 0d1e27ac37a5e10dbd2d22a3b9da32e7f17a40cd Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 18 Dec 2022 07:49:21 -0500
Subject: [PATCH 2/3] clippy

---
 datafusion/core/tests/sqllogictests/src/main.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/datafusion/core/tests/sqllogictests/src/main.rs b/datafusion/core/tests/sqllogictests/src/main.rs
index 9a4d5bbac6bb..b89edcc56369 100644
--- a/datafusion/core/tests/sqllogictests/src/main.rs
+++ b/datafusion/core/tests/sqllogictests/src/main.rs
@@ -184,7 +184,6 @@ impl Options {
                 .skip(1)
                 // ignore command line arguments like `--complete`
                 .filter(|arg| !arg.as_str().starts_with("--"))
-                .map(|arg| arg.to_string())
                 .collect::<Vec<_>>()
         } else {
             vec![]

From 79f134592be27c162eaa87b47af93d8030553f31 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 22 Dec 2022 06:17:52 -0500
Subject: [PATCH 3/3] Apply suggestions from code review

Co-authored-by: xudong.w <wxd963996380@gmail.com>
---
 datafusion/core/tests/sqllogictests/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/README.md b/datafusion/core/tests/sqllogictests/README.md
index 15a4e2834193..d472245783fa 100644
--- a/datafusion/core/tests/sqllogictests/README.md
+++ b/datafusion/core/tests/sqllogictests/README.md
@@ -23,7 +23,7 @@ This is the Datafusion implementation of [sqllogictest](https://www.sqlite.org/s
 
 #### Running tests: Validation Mode
 
-In this model, `sqllogictests` runs the statements and queries in a `.slt` file, comparing the expected output in the fule to the output produced by that run.
+In this model, `sqllogictests` runs the statements and queries in a `.slt` file, comparing the expected output in the file to the output produced by that run.
 
 Run all tests suites in validation mode
 
@@ -52,7 +52,7 @@ You can update tests by passing the `--complete` argument.
 
 ```shell
 # Update ddl.slt with output from running
-cargo test -p datafusion --test sqllogictests -- ddl
+cargo test -p datafusion --test sqllogictests -- ddl --complete
 ```
 
 #### sqllogictests