Skip to content

Commit cd96b26

Browse files
authored
Migrate subtrait tests to insta, part1 (#15444)
* add `cargo insta` to dev dependencies * migrate `consumer_intergration.rs` tests to `insta` * Revert "migrate `consumer_intergration.rs` tests to `insta`" This reverts commit c3be2eb. * migrate `consumer_integration.rs` to `insta` inline snapshot * migrate logical plans tests to use `insta` snapshots * migrate emit_kind_tests to use `insta` snapshots * migrate function_test to use `insta` snapshots for assertions * migrate substrait_validations tests to use insta snapshots, missing `insta` mapping to `assert!` * revert `handle_emit_as_project_without_volatile_exprs` back to `assert_eq!` and remove `format!` for `assert_snapshot!` * migrate function and validation tests to use plan directly in assert_snapshot! * migrate serialize tests to use insta snapshots for assertions * migrate logical_plans test to use insta snapshots for assertions
1 parent 0f7b1c5 commit cd96b26

8 files changed

+462
-383
lines changed

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/substrait/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ datafusion = { workspace = true, features = ["nested_expressions"] }
4848
datafusion-functions-aggregate = { workspace = true }
4949
serde_json = "1.0"
5050
tokio = { workspace = true }
51+
insta = { workspace = true }
5152

5253
[features]
5354
default = ["physical"]

datafusion/substrait/tests/cases/consumer_integration.rs

+323-285
Large diffs are not rendered by default.

datafusion/substrait/tests/cases/emit_kind_tests.rs

+40-32
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ mod tests {
2626
use datafusion::prelude::{CsvReadOptions, SessionConfig, SessionContext};
2727
use datafusion_substrait::logical_plan::consumer::from_substrait_plan;
2828
use datafusion_substrait::logical_plan::producer::to_substrait_plan;
29+
use insta::assert_snapshot;
2930

3031
#[tokio::test]
3132
async fn project_respects_direct_emit_kind() -> Result<()> {
@@ -35,13 +36,13 @@ mod tests {
3536
let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
3637
let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
3738

38-
let plan_str = format!("{}", plan);
39-
40-
assert_eq!(
41-
plan_str,
42-
"Projection: DATA.A AS a, DATA.B AS b, DATA.A + Int64(1) AS add1\
43-
\n TableScan: DATA"
44-
);
39+
assert_snapshot!(
40+
plan,
41+
@r#"
42+
Projection: DATA.A AS a, DATA.B AS b, DATA.A + Int64(1) AS add1
43+
TableScan: DATA
44+
"#
45+
);
4546
Ok(())
4647
}
4748

@@ -53,15 +54,15 @@ mod tests {
5354
let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
5455
let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
5556

56-
let plan_str = format!("{}", plan);
57-
58-
assert_eq!(
59-
plan_str,
60-
// Note that duplicate references in the remap are aliased
61-
"Projection: DATA.B, DATA.A AS A1, DATA.A AS DATA.A__temp__0 AS A2\
62-
\n Filter: DATA.B = Int64(2)\
63-
\n TableScan: DATA"
64-
);
57+
assert_snapshot!(
58+
plan,
59+
// Note that duplicate references in the remap are aliased
60+
@r#"
61+
Projection: DATA.B, DATA.A AS A1, DATA.A AS DATA.A__temp__0 AS A2
62+
Filter: DATA.B = Int64(2)
63+
TableScan: DATA
64+
"#
65+
);
6566
Ok(())
6667
}
6768

@@ -85,21 +86,24 @@ mod tests {
8586
.await?;
8687

8788
let plan = df.into_unoptimized_plan();
88-
assert_eq!(
89-
format!("{}", plan),
90-
"Projection: random() AS c1, data.a + Int64(1) AS c2\
91-
\n TableScan: data"
92-
);
89+
assert_snapshot!(
90+
plan,
91+
@r#"
92+
Projection: random() AS c1, data.a + Int64(1) AS c2
93+
TableScan: data
94+
"# );
9395

9496
let proto = to_substrait_plan(&plan, &ctx.state())?;
9597
let plan2 = from_substrait_plan(&ctx.state(), &proto).await?;
9698
// note how the Projections are not flattened
97-
assert_eq!(
98-
format!("{}", plan2),
99-
"Projection: random() AS c1, data.a + Int64(1) AS c2\
100-
\n Projection: data.a, data.b, data.c, data.d, data.e, data.f, random(), data.a + Int64(1)\
101-
\n TableScan: data"
102-
);
99+
assert_snapshot!(
100+
plan2,
101+
@r#"
102+
Projection: random() AS c1, data.a + Int64(1) AS c2
103+
Projection: data.a, data.b, data.c, data.d, data.e, data.f, random(), data.a + Int64(1)
104+
TableScan: data
105+
"#
106+
);
103107
Ok(())
104108
}
105109

@@ -109,17 +113,21 @@ mod tests {
109113
let df = ctx.sql("SELECT a + 1, b + 2 FROM data").await?;
110114

111115
let plan = df.into_unoptimized_plan();
112-
assert_eq!(
113-
format!("{}", plan),
114-
"Projection: data.a + Int64(1), data.b + Int64(2)\
115-
\n TableScan: data"
116-
);
116+
assert_snapshot!(
117+
plan,
118+
@r#"
119+
Projection: data.a + Int64(1), data.b + Int64(2)
120+
TableScan: data
121+
"#
122+
);
117123

118124
let proto = to_substrait_plan(&plan, &ctx.state())?;
119125
let plan2 = from_substrait_plan(&ctx.state(), &proto).await?;
120126

121127
let plan1str = format!("{plan}");
122128
let plan2str = format!("{plan2}");
129+
println!("{}", plan1str);
130+
println!("{}", plan2str);
123131
assert_eq!(plan1str, plan2str);
124132

125133
Ok(())

datafusion/substrait/tests/cases/function_test.rs

+9-8
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,22 @@ mod tests {
2424
use datafusion::common::Result;
2525
use datafusion::prelude::SessionContext;
2626
use datafusion_substrait::logical_plan::consumer::from_substrait_plan;
27+
use insta::assert_snapshot;
2728

2829
#[tokio::test]
2930
async fn contains_function_test() -> Result<()> {
3031
let proto_plan = read_json("tests/testdata/contains_plan.substrait.json");
3132
let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
3233
let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
3334

34-
let plan_str = format!("{}", plan);
35-
36-
assert_eq!(
37-
plan_str,
38-
"Projection: nation.n_name\
39-
\n Filter: contains(nation.n_name, Utf8(\"IA\"))\
40-
\n TableScan: nation"
41-
);
35+
assert_snapshot!(
36+
plan,
37+
@r#"
38+
Projection: nation.n_name
39+
Filter: contains(nation.n_name, Utf8("IA"))
40+
TableScan: nation
41+
"#
42+
);
4243
Ok(())
4344
}
4445
}

datafusion/substrait/tests/cases/logical_plans.rs

+48-32
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ mod tests {
2424
use datafusion::dataframe::DataFrame;
2525
use datafusion::prelude::SessionContext;
2626
use datafusion_substrait::logical_plan::consumer::from_substrait_plan;
27+
use insta::assert_snapshot;
2728

2829
#[tokio::test]
2930
async fn scalar_function_compound_signature() -> Result<()> {
@@ -40,11 +41,13 @@ mod tests {
4041
let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
4142
let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
4243

43-
assert_eq!(
44-
format!("{}", plan),
45-
"Projection: NOT DATA.D AS EXPR$0\
46-
\n TableScan: DATA"
47-
);
44+
assert_snapshot!(
45+
plan,
46+
@r#"
47+
Projection: NOT DATA.D AS EXPR$0
48+
TableScan: DATA
49+
"#
50+
);
4851

4952
// Trigger execution to ensure plan validity
5053
DataFrame::new(ctx.state(), plan).show().await?;
@@ -69,12 +72,14 @@ mod tests {
6972
let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
7073
let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
7174

72-
assert_eq!(
73-
format!("{}", plan),
74-
"Projection: sum(DATA.D) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS LEAD_EXPR\
75-
\n WindowAggr: windowExpr=[[sum(DATA.D) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]\
76-
\n TableScan: DATA"
77-
);
75+
assert_snapshot!(
76+
plan,
77+
@r#"
78+
Projection: sum(DATA.D) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING AS LEAD_EXPR
79+
WindowAggr: windowExpr=[[sum(DATA.D) PARTITION BY [DATA.PART] ORDER BY [DATA.ORD ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]]
80+
TableScan: DATA
81+
"#
82+
);
7883

7984
// Trigger execution to ensure plan validity
8085
DataFrame::new(ctx.state(), plan).show().await?;
@@ -94,12 +99,14 @@ mod tests {
9499
let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
95100
let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
96101

97-
assert_eq!(
98-
format!("{}", plan),
99-
"Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$0, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW__temp__0 AS ALIASED\
100-
\n WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
101-
\n TableScan: DATA"
102-
);
102+
assert_snapshot!(
103+
plan,
104+
@r#"
105+
Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$0, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW__temp__0 AS ALIASED
106+
WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
107+
TableScan: DATA
108+
"#
109+
);
103110

104111
// Trigger execution to ensure plan validity
105112
DataFrame::new(ctx.state(), plan).show().await?;
@@ -121,13 +128,15 @@ mod tests {
121128
let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
122129
let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
123130

124-
assert_eq!(
125-
format!("{}", plan),
126-
"Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$0, row_number() PARTITION BY [DATA.A] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$1\
127-
\n WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
128-
\n WindowAggr: windowExpr=[[row_number() PARTITION BY [DATA.A] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
129-
\n TableScan: DATA"
130-
);
131+
assert_snapshot!(
132+
plan,
133+
@r#"
134+
Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$0, row_number() PARTITION BY [DATA.A] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS EXPR$1
135+
WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
136+
WindowAggr: windowExpr=[[row_number() PARTITION BY [DATA.A] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
137+
TableScan: DATA
138+
"#
139+
);
131140

132141
// Trigger execution to ensure plan validity
133142
DataFrame::new(ctx.state(), plan).show().await?;
@@ -145,7 +154,12 @@ mod tests {
145154
let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
146155
let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
147156

148-
assert_eq!(format!("{}", &plan), "Values: (List([1, 2]))");
157+
assert_snapshot!(
158+
&plan,
159+
@r#"
160+
Values: (List([1, 2]))
161+
"#
162+
);
149163

150164
// Trigger execution to ensure plan validity
151165
DataFrame::new(ctx.state(), plan).show().await?;
@@ -160,13 +174,15 @@ mod tests {
160174
let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
161175
let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
162176

163-
assert_eq!(
164-
format!("{}", plan),
165-
"Projection: lower(sales.product) AS lower(product), sum(count(sales.product)) AS product_count\
166-
\n Aggregate: groupBy=[[sales.product]], aggr=[[sum(count(sales.product))]]\
167-
\n Aggregate: groupBy=[[sales.product]], aggr=[[count(sales.product)]]\
168-
\n TableScan: sales"
169-
);
177+
assert_snapshot!(
178+
plan,
179+
@r#"
180+
Projection: lower(sales.product) AS lower(product), sum(count(sales.product)) AS product_count
181+
Aggregate: groupBy=[[sales.product]], aggr=[[sum(count(sales.product))]]
182+
Aggregate: groupBy=[[sales.product]], aggr=[[count(sales.product)]]
183+
TableScan: sales
184+
"#
185+
);
170186

171187
// Trigger execution to ensure plan validity
172188
DataFrame::new(ctx.state(), plan).show().await?;

datafusion/substrait/tests/cases/serialize.rs

+18-11
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ mod tests {
2727
use datafusion::error::Result;
2828
use datafusion::prelude::*;
2929

30+
use insta::assert_snapshot;
3031
use std::fs;
3132
use substrait::proto::plan_rel::RelType;
3233
use substrait::proto::rel_common::{Emit, EmitKind};
@@ -92,11 +93,14 @@ mod tests {
9293
let df = ctx.sql("SELECT b, a + a, a FROM data").await?;
9394
let datafusion_plan = df.into_optimized_plan()?;
9495

95-
assert_eq!(
96-
format!("{}", datafusion_plan),
97-
"Projection: data.b, data.a + data.a, data.a\
98-
\n TableScan: data projection=[a, b]",
99-
);
96+
assert_snapshot!(
97+
format!("{}", datafusion_plan),
98+
@r#"
99+
Projection: data.b, data.a + data.a, data.a
100+
TableScan: data projection=[a, b]
101+
"#
102+
,
103+
);
100104

101105
let plan = to_substrait_plan(&datafusion_plan, &ctx.state())?
102106
.as_ref()
@@ -136,12 +140,15 @@ mod tests {
136140
.sql("SELECT b, RANK() OVER (PARTITION BY a), c FROM data;")
137141
.await?;
138142
let datafusion_plan = df.into_optimized_plan()?;
139-
assert_eq!(
140-
format!("{}", datafusion_plan),
141-
"Projection: data.b, rank() PARTITION BY [data.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, data.c\
142-
\n WindowAggr: windowExpr=[[rank() PARTITION BY [data.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
143-
\n TableScan: data projection=[a, b, c]",
144-
);
143+
assert_snapshot!(
144+
datafusion_plan,
145+
@r#"
146+
Projection: data.b, rank() PARTITION BY [data.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, data.c
147+
WindowAggr: windowExpr=[[rank() PARTITION BY [data.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
148+
TableScan: data projection=[a, b, c]
149+
"#
150+
,
151+
);
145152

146153
let plan = to_substrait_plan(&datafusion_plan, &ctx.state())?
147154
.as_ref()

0 commit comments

Comments
 (0)