Skip to content

Test for parse(unparse(parse()) is identity #129

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sqllogictest/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ description = "Sqllogictest parser and runner."

[dependencies]
async-trait = "0.1"
derivative = "2.2.0"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use derivative as suggested by @xxchan

difference = "2.0"
glob = "0.3"
humantime = "2"
Expand Down
160 changes: 103 additions & 57 deletions sqllogictest/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
//! Sqllogictest parser.

use derivative::Derivative;
use std::fmt;
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;

use itertools::Itertools;
use regex::Regex;

use crate::ColumnType;
use crate::ParseErrorKind::InvalidIncludeFile;
use itertools::Itertools;
use regex::Regex;

/// The location in source file.
#[derive(Debug, PartialEq, Eq, Clone)]
Expand Down Expand Up @@ -66,7 +65,8 @@ impl Location {
}

/// A single directive in a sqllogictest file.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Derivative)]
#[derivative(PartialEq)]
#[non_exhaustive]
pub enum Record {
/// An include copies all records from another files.
Expand All @@ -81,6 +81,7 @@ pub enum Record {
conditions: Vec<Condition>,
/// The SQL command is expected to fail with an error messages that matches the given
/// regex. If the regex is an empty string, any error message is accepted.
#[derivative(PartialEq(compare_with = "cmp_regex"))]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 much nicer

expected_error: Option<Regex>,
/// The SQL command.
sql: String,
Expand All @@ -97,6 +98,7 @@ pub enum Record {
label: Option<String>,
/// The SQL command is expected to fail with an error messages that matches the given
/// regex. If the regex is an empty string, any error message is accepted.
#[derivative(PartialEq(compare_with = "cmp_regex"))]
expected_error: Option<Regex>,
/// The SQL command.
sql: String,
Expand Down Expand Up @@ -137,6 +139,17 @@ pub enum Record {
Injected(Injected),
}

/// Use string representation to determine if two regular
/// expressions came from the same text (rather than something
/// more deeply meaningful)
fn cmp_regex(l: &Option<Regex>, r: &Option<Regex>) -> bool {
match (l, r) {
(Some(l), Some(r)) => l.as_str().eq(r.as_str()),
(None, None) => true,
_ => false,
}
}

impl Record {
/// Unparses the record to its string representation in the test file.
///
Expand Down Expand Up @@ -617,7 +630,7 @@ fn parse_file_inner(loc: Location) -> Result<Vec<Record>, ParseError> {

#[cfg(test)]
mod tests {
use difference::{Changeset, Difference};
use std::io::Write;

use super::*;

Expand All @@ -632,72 +645,105 @@ mod tests {
parse_roundtrip("../examples/basic/basic.slt")
}

/// Parses the specified file into Records, and ensures the
/// results of unparsing them are the same
///
/// Prints a hopefully useful message on failure
#[test]
fn test_condition() {
parse_roundtrip("../examples/condition/condition.slt")
}

#[test]
fn test_file_level_sort_mode() {
parse_roundtrip("../examples/file_level_sort_mode/file_level_sort_mode.slt")
}

#[test]
fn test_rowsort() {
parse_roundtrip("../examples/rowsort/rowsort.slt")
}

#[test]
fn test_test_dir_escape() {
parse_roundtrip("../examples/test_dir_escape/test_dir_escape.slt")
}

#[test]
fn test_validator() {
parse_roundtrip("../examples/validator/validator.slt")
}

/// Verifies Display impl is consistent with parsing by ensuring
Copy link
Contributor Author

@alamb alamb Dec 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed the "display roundtrip check" (I will try and propose a more specific test in a follow on PR)

/// roundtrip parse(unparse(parse())) is consistent
fn parse_roundtrip(filename: impl AsRef<Path>) {
let filename = filename.as_ref();
let input_contents = std::fs::read_to_string(filename).expect("reading file");

let records = parse_file(filename).expect("parsing to complete");

let unparsed = records
.iter()
.map(|record| record.to_string())
.collect::<Vec<_>>();

// Technically this will not always be the same due to some whitespace normalization
//
// query III
// select * from foo;
// ----
// 1 2
//
// Will print out collaposting the spaces between `query`
//
// query III
// select * from foo;
// ----
// 1 2
let output_contents = unparsed.join("\n");

let changeset = Changeset::new(&input_contents, &output_contents, "\n");

assert!(
no_diffs(&changeset),
"Mismatch for {:?}\n\
*********\n\
diff:\n\
*********\n\
{}\n\n\
*********\n\
output:\n\
*********\n\
{}\n\n",
filename,
UsefulDiffDisplay(&changeset),
output_contents,
// The orignal and parsed records should be logically requivalent
let mut output_file = tempfile::NamedTempFile::new().expect("Error creating tempfile");
output_file
.write_all(output_contents.as_bytes())
.expect("Unable to write file");
output_file.flush().unwrap();

let output_path = output_file.into_temp_path();
let reparsed_records =
parse_file(&output_path).expect("reparsing to complete successfully");

let records = normalize_filename(records);
let reparsed_records = normalize_filename(reparsed_records);

assert_eq!(
records, reparsed_records,
"Mismatch in reparsed records\n\
*********\n\
original:\n\
*********\n\
{:#?}\n\n\
*********\n\
reparsed:\n\
*********\n\
{:#?}\n\n",
records, reparsed_records,
);
}

/// returns true if there are no differences in the changeset
fn no_diffs(changeset: &Changeset) -> bool {
changeset
.diffs
.iter()
.all(|diff| matches!(diff, Difference::Same(_)))
/// Replaces the actual filename in all Records with
/// "__FILENAME__" so different files with the same contents can
/// compare equal
fn normalize_filename(records: Vec<Record>) -> Vec<Record> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am also not a super fan of this patter either, but it is what I can come up with

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about using derivative crate to ignore a field when comparing? https://mcarton.github.io/rust-derivative/latest/cmp.html#ignoring-a-field

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll give it a try 👍

records
.into_iter()
.map(|mut record| {
match &mut record {
Record::Include { loc, .. } => normalize_loc(loc),
Record::Statement { loc, .. } => normalize_loc(loc),
Record::Query { loc, .. } => normalize_loc(loc),
Record::Sleep { loc, .. } => normalize_loc(loc),
Record::Subtest { loc, .. } => normalize_loc(loc),
Record::Halt { loc, .. } => normalize_loc(loc),
Record::HashThreshold { loc, .. } => normalize_loc(loc),
// even though these variants don't include a
// location include them in this match statement
// so if new variants are added, this match
// statement must be too.
Record::Condition(_)
| Record::Comment(_)
| Record::Control(_)
| Record::Newline
| Record::Injected(_) => {}
};
record
})
.collect()
}

struct UsefulDiffDisplay<'a>(&'a Changeset);

impl<'a> std::fmt::Display for UsefulDiffDisplay<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.diffs.iter().try_for_each(|diff| match diff {
Difference::Same(x) => writeln!(f, "{x}"),
Difference::Add(x) => writeln!(f, "+ {x}"),
Difference::Rem(x) => writeln!(f, "- {x}"),
})
}
// Normalize a location
fn normalize_loc(loc: &mut Location) {
loc.file = Arc::from("__FILENAME__");
}
}