From 86ca11a97d84ddd5ed5b75a329a3a5c09957f547 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Thu, 13 Mar 2025 17:06:50 +0000 Subject: [PATCH 1/5] Add SessionConfig reference to ScalarFunctionArgs --- .../examples/composed_extension_codec.rs | 4 +- datafusion-examples/examples/expr_api.rs | 39 ++-- datafusion-examples/examples/planner_api.rs | 18 +- datafusion-examples/examples/pruning.rs | 5 +- datafusion-examples/examples/simple_udtf.rs | 4 +- datafusion/catalog-listing/src/helpers.rs | 34 ++- datafusion/common/src/config.rs | 16 +- datafusion/core/Cargo.toml | 2 +- .../core/src/datasource/listing/table.rs | 7 + datafusion/core/src/datasource/memory.rs | 3 + datafusion/core/src/execution/context/mod.rs | 2 + .../core/src/execution/session_state.rs | 70 ++++-- datafusion/core/src/physical_planner.rs | 221 +++++++++++++++--- datafusion/core/src/test_util/parquet.rs | 12 +- datafusion/core/tests/expr_api/mod.rs | 4 +- .../core/tests/expr_api/simplification.rs | 16 +- .../tests/fuzz_cases/equivalence/ordering.rs | 2 + .../fuzz_cases/equivalence/projection.rs | 3 + .../fuzz_cases/equivalence/properties.rs | 2 + datafusion/core/tests/parquet/page_pruning.rs | 8 +- .../aggregate_statistics.rs | 12 +- .../combine_partial_final_agg.rs | 3 +- .../physical_optimizer/enforce_sorting.rs | 5 +- .../physical_optimizer/join_selection.rs | 19 +- .../physical_optimizer/limit_pushdown.rs | 42 ++-- .../physical_optimizer/projection_pushdown.rs | 76 +++--- datafusion/datasource/src/file_scan_config.rs | 12 +- datafusion/expr-common/src/accumulator.rs | 4 +- datafusion/expr/src/simplify.rs | 19 +- datafusion/expr/src/udf.rs | 3 + datafusion/ffi/src/plan_properties.rs | 6 +- datafusion/ffi/src/udf.rs | 9 +- datafusion/functions-nested/benches/map.rs | 12 +- .../functions/benches/character_length.rs | 7 + datafusion/functions/benches/chr.rs | 4 + datafusion/functions/benches/concat.rs | 4 + datafusion/functions/benches/cot.rs | 7 +- datafusion/functions/benches/date_bin.rs | 8 +- datafusion/functions/benches/date_trunc.rs | 9 +- datafusion/functions/benches/encoding.rs | 7 + datafusion/functions/benches/find_in_set.rs | 6 + datafusion/functions/benches/gcd.rs | 5 + datafusion/functions/benches/initcap.rs | 6 + datafusion/functions/benches/isnan.rs | 5 + datafusion/functions/benches/iszero.rs | 5 + datafusion/functions/benches/lower.rs | 9 + datafusion/functions/benches/ltrim.rs | 4 + datafusion/functions/benches/make_date.rs | 14 +- datafusion/functions/benches/nullif.rs | 4 + datafusion/functions/benches/pad.rs | 9 + datafusion/functions/benches/random.rs | 4 + datafusion/functions/benches/repeat.rs | 10 + datafusion/functions/benches/reverse.rs | 8 + datafusion/functions/benches/signum.rs | 5 + datafusion/functions/benches/strpos.rs | 7 +- datafusion/functions/benches/substr.rs | 12 + datafusion/functions/benches/substr_index.rs | 10 +- datafusion/functions/benches/to_char.rs | 7 + datafusion/functions/benches/to_hex.rs | 5 + datafusion/functions/benches/to_timestamp.rs | 17 +- datafusion/functions/benches/trunc.rs | 5 + datafusion/functions/benches/upper.rs | 4 + datafusion/functions/benches/uuid.rs | 4 + .../functions/src/core/union_extract.rs | 5 + datafusion/functions/src/core/version.rs | 2 + datafusion/functions/src/datetime/date_bin.rs | 17 ++ .../functions/src/datetime/date_trunc.rs | 3 + .../functions/src/datetime/from_unixtime.rs | 3 + .../functions/src/datetime/make_date.rs | 10 + datafusion/functions/src/datetime/to_char.rs | 8 + datafusion/functions/src/datetime/to_date.rs | 9 + .../functions/src/datetime/to_local_time.rs | 3 + .../functions/src/datetime/to_timestamp.rs | 3 + datafusion/functions/src/math/log.rs | 17 +- datafusion/functions/src/math/power.rs | 3 + datafusion/functions/src/math/signum.rs | 3 + datafusion/functions/src/regex/regexpcount.rs | 17 ++ datafusion/functions/src/string/concat.rs | 2 + datafusion/functions/src/string/concat_ws.rs | 3 + datafusion/functions/src/string/contains.rs | 2 + datafusion/functions/src/string/lower.rs | 2 + datafusion/functions/src/string/upper.rs | 2 + .../functions/src/unicode/find_in_set.rs | 3 + datafusion/functions/src/utils.rs | 5 +- datafusion/optimizer/src/decorrelate.rs | 40 ++-- .../src/decorrelate_predicate_subquery.rs | 46 +++- datafusion/optimizer/src/optimizer.rs | 24 +- datafusion/optimizer/src/push_down_filter.rs | 35 ++- .../optimizer/src/scalar_subquery_to_join.rs | 9 +- .../simplify_expressions/expr_simplifier.rs | 93 +++++--- .../simplify_expressions/simplify_exprs.rs | 7 +- .../src/simplify_expressions/unwrap_cast.rs | 4 +- datafusion/optimizer/src/utils.rs | 18 +- datafusion/physical-expr/src/analysis.rs | 29 ++- .../physical-expr/src/equivalence/ordering.rs | 5 + .../src/equivalence/projection.rs | 2 + .../src/equivalence/properties/dependency.rs | 3 + datafusion/physical-expr/src/planner.rs | 206 ++++++++++++---- .../physical-expr/src/scalar_function.rs | 54 ++++- datafusion/proto/src/bytes/mod.rs | 14 +- .../proto/src/physical_plan/from_proto.rs | 103 ++++++-- datafusion/proto/src/physical_plan/mod.rs | 149 +++++++++--- .../tests/cases/roundtrip_logical_plan.rs | 9 +- .../tests/cases/roundtrip_physical_plan.rs | 13 +- datafusion/wasmtest/src/lib.rs | 7 +- 105 files changed, 1506 insertions(+), 391 deletions(-) diff --git a/datafusion-examples/examples/composed_extension_codec.rs b/datafusion-examples/examples/composed_extension_codec.rs index 4baefcae507f..dc485540527b 100644 --- a/datafusion-examples/examples/composed_extension_codec.rs +++ b/datafusion-examples/examples/composed_extension_codec.rs @@ -71,8 +71,10 @@ async fn main() { // deserialize proto back to execution plan let runtime = ctx.runtime_env(); + let state = ctx.state(); + let config_options = state.config_options(); let result_exec_plan: Arc = proto - .try_into_physical_plan(&ctx, runtime.deref(), &composed_codec) + .try_into_physical_plan(&ctx, config_options, runtime.deref(), &composed_codec) .expect("from proto"); // assert that the original and deserialized execution plans are equal diff --git a/datafusion-examples/examples/expr_api.rs b/datafusion-examples/examples/expr_api.rs index b61a350a5a9c..1cff4849a7aa 100644 --- a/datafusion-examples/examples/expr_api.rs +++ b/datafusion-examples/examples/expr_api.rs @@ -26,6 +26,7 @@ use datafusion::common::stats::Precision; use datafusion::common::tree_node::{Transformed, TreeNode}; use datafusion::common::{ColumnStatistics, DFSchema}; use datafusion::common::{ScalarValue, ToDFSchema}; +use datafusion::config::ConfigOptions; use datafusion::error::Result; use datafusion::functions_aggregate::first_last::first_value_udaf; use datafusion::logical_expr::execution_props::ExecutionProps; @@ -35,7 +36,9 @@ use datafusion::logical_expr::simplify::SimplifyContext; use datafusion::logical_expr::{ColumnarValue, ExprFunctionExt, ExprSchemable, Operator}; use datafusion::optimizer::analyzer::type_coercion::TypeCoercionRewriter; use datafusion::optimizer::simplify_expressions::ExprSimplifier; -use datafusion::physical_expr::{analyze, AnalysisContext, ExprBoundaries}; +use datafusion::physical_expr::{ + analyze, create_physical_expr, AnalysisContext, ExprBoundaries, +}; use datafusion::prelude::*; /// This example demonstrates the DataFusion [`Expr`] API. @@ -176,7 +179,8 @@ fn simplify_demo() -> Result<()> { // expressions, such as the current time (to evaluate `now()` // correctly) let props = ExecutionProps::new(); - let context = SimplifyContext::new(&props).with_schema(schema); + let config_options = ConfigOptions::default_singleton_arc(); + let context = SimplifyContext::new(&props, config_options).with_schema(schema); let simplifier = ExprSimplifier::new(context); // And then call the simplify_expr function: @@ -191,7 +195,8 @@ fn simplify_demo() -> Result<()> { // here are some other examples of what DataFusion is capable of let schema = Schema::new(vec![make_field("i", DataType::Int64)]).to_dfschema_ref()?; - let context = SimplifyContext::new(&props).with_schema(schema.clone()); + let context = + SimplifyContext::new(&props, config_options).with_schema(schema.clone()); let simplifier = ExprSimplifier::new(context); // basic arithmetic simplification @@ -529,8 +534,8 @@ fn type_coercion_demo() -> Result<()> { // Evaluation with an expression that has not been type coerced cannot succeed. let props = ExecutionProps::default(); - let physical_expr = - datafusion::physical_expr::create_physical_expr(&expr, &df_schema, &props)?; + let config_options = ConfigOptions::default_singleton_arc(); + let physical_expr = create_physical_expr(&expr, &df_schema, &props, config_options)?; let e = physical_expr.evaluate(&batch).unwrap_err(); assert!(e .find_root() @@ -543,14 +548,12 @@ fn type_coercion_demo() -> Result<()> { assert!(physical_expr.evaluate(&batch).is_ok()); // 2. Type coercion with `ExprSimplifier::coerce`. - let context = SimplifyContext::new(&props).with_schema(Arc::new(df_schema.clone())); + let context = SimplifyContext::new(&props, config_options) + .with_schema(Arc::new(df_schema.clone())); let simplifier = ExprSimplifier::new(context); let coerced_expr = simplifier.coerce(expr.clone(), &df_schema)?; - let physical_expr = datafusion::physical_expr::create_physical_expr( - &coerced_expr, - &df_schema, - &props, - )?; + let physical_expr = + create_physical_expr(&coerced_expr, &df_schema, &props, config_options)?; assert!(physical_expr.evaluate(&batch).is_ok()); // 3. Type coercion with `TypeCoercionRewriter`. @@ -558,11 +561,8 @@ fn type_coercion_demo() -> Result<()> { .clone() .rewrite(&mut TypeCoercionRewriter::new(&df_schema))? .data; - let physical_expr = datafusion::physical_expr::create_physical_expr( - &coerced_expr, - &df_schema, - &props, - )?; + let physical_expr = + create_physical_expr(&coerced_expr, &df_schema, &props, config_options)?; assert!(physical_expr.evaluate(&batch).is_ok()); // 4. Apply explicit type coercion by manually rewriting the expression @@ -586,11 +586,8 @@ fn type_coercion_demo() -> Result<()> { } })? .data; - let physical_expr = datafusion::physical_expr::create_physical_expr( - &coerced_expr, - &df_schema, - &props, - )?; + let physical_expr = + create_physical_expr(&coerced_expr, &df_schema, &props, config_options)?; assert!(physical_expr.evaluate(&batch).is_ok()); Ok(()) diff --git a/datafusion-examples/examples/planner_api.rs b/datafusion-examples/examples/planner_api.rs index 41110a3e0a9c..4cb204c581e1 100644 --- a/datafusion-examples/examples/planner_api.rs +++ b/datafusion-examples/examples/planner_api.rs @@ -16,6 +16,7 @@ // under the License. use datafusion::error::Result; +use datafusion::execution::session_state::SessionStateOptimizerConfig; use datafusion::logical_expr::{LogicalPlan, PlanType}; use datafusion::physical_plan::{displayable, DisplayFormatType}; use datafusion::physical_planner::DefaultPhysicalPlanner; @@ -97,17 +98,19 @@ async fn to_physical_plan_step_by_step_demo( ctx: &SessionContext, ) -> Result<()> { // First analyze the logical plan - let analyzed_logical_plan = ctx.state().analyzer().execute_and_check( + let session_state = ctx.state(); + let analyzed_logical_plan = session_state.analyzer().execute_and_check( input, - ctx.state().config_options(), + session_state.config_options(), |_, _| (), )?; println!("Analyzed logical plan:\n\n{:?}\n\n", analyzed_logical_plan); // Optimize the analyzed logical plan - let optimized_logical_plan = ctx.state().optimizer().optimize( + let session_optimizer_config = SessionStateOptimizerConfig::new(&session_state); + let optimized_logical_plan = session_state.optimizer().optimize( analyzed_logical_plan, - &ctx.state(), + &session_optimizer_config, |_, _| (), )?; println!( @@ -116,10 +119,9 @@ async fn to_physical_plan_step_by_step_demo( ); // Create the physical plan - let physical_plan = ctx - .state() + let physical_plan = session_state .query_planner() - .create_physical_plan(&optimized_logical_plan, &ctx.state()) + .create_physical_plan(&optimized_logical_plan, &session_state) .await?; println!( "Final physical plan:\n\n{}\n\n", @@ -139,7 +141,7 @@ async fn to_physical_plan_step_by_step_demo( // on DefaultPhysicalPlanner. Not all planners will provide this feature. let planner = DefaultPhysicalPlanner::default(); let physical_plan = - planner.optimize_physical_plan(physical_plan, &ctx.state(), |_, _| {})?; + planner.optimize_physical_plan(physical_plan, &session_state, |_, _| {})?; println!( "Optimized physical plan:\n\n{}\n\n", displayable(physical_plan.as_ref()) diff --git a/datafusion-examples/examples/pruning.rs b/datafusion-examples/examples/pruning.rs index 4c802bcdbda0..0dc27f943b44 100644 --- a/datafusion-examples/examples/pruning.rs +++ b/datafusion-examples/examples/pruning.rs @@ -20,6 +20,7 @@ use std::sync::Arc; use arrow::array::{ArrayRef, BooleanArray, Int32Array}; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use datafusion::common::config::ConfigOptions; use datafusion::common::{DFSchema, ScalarValue}; use datafusion::execution::context::ExecutionProps; use datafusion::physical_expr::create_physical_expr; @@ -188,7 +189,9 @@ impl PruningStatistics for MyCatalog { fn create_pruning_predicate(expr: Expr, schema: &SchemaRef) -> PruningPredicate { let df_schema = DFSchema::try_from(schema.as_ref().clone()).unwrap(); let props = ExecutionProps::new(); - let physical_expr = create_physical_expr(&expr, &df_schema, &props).unwrap(); + let config_options = ConfigOptions::default_singleton_arc(); + let physical_expr = + create_physical_expr(&expr, &df_schema, &props, config_options).unwrap(); PruningPredicate::try_new(physical_expr, schema.clone()).unwrap() } diff --git a/datafusion-examples/examples/simple_udtf.rs b/datafusion-examples/examples/simple_udtf.rs index d2b2d1bf9655..9414ca5885a1 100644 --- a/datafusion-examples/examples/simple_udtf.rs +++ b/datafusion-examples/examples/simple_udtf.rs @@ -23,6 +23,7 @@ use datafusion::arrow::record_batch::RecordBatch; use datafusion::catalog::Session; use datafusion::catalog::TableFunctionImpl; use datafusion::common::{plan_err, ScalarValue}; +use datafusion::config::ConfigOptions; use datafusion::datasource::memory::MemorySourceConfig; use datafusion::datasource::TableProvider; use datafusion::error::Result; @@ -142,7 +143,8 @@ impl TableFunctionImpl for LocalCsvTableFunc { .map(|expr| { // try to simplify the expression, so 1+2 becomes 3, for example let execution_props = ExecutionProps::new(); - let info = SimplifyContext::new(&execution_props); + let config_options = ConfigOptions::default_singleton_arc(); + let info = SimplifyContext::new(&execution_props, config_options); let expr = ExprSimplifier::new(info).simplify(expr.clone())?; if let Expr::Literal(ScalarValue::Int64(Some(limit))) = expr { diff --git a/datafusion/catalog-listing/src/helpers.rs b/datafusion/catalog-listing/src/helpers.rs index 9ac8423042d3..753a547d260f 100644 --- a/datafusion/catalog-listing/src/helpers.rs +++ b/datafusion/catalog-listing/src/helpers.rs @@ -33,13 +33,15 @@ use arrow::{ datatypes::{DataType, Field, Fields, Schema}, record_batch::RecordBatch, }; -use datafusion_expr::execution_props::ExecutionProps; + use futures::stream::FuturesUnordered; use futures::{stream::BoxStream, StreamExt, TryStreamExt}; use log::{debug, trace}; +use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; use datafusion_common::{Column, DFSchema, DataFusionError}; +use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::{Expr, Volatility}; use datafusion_physical_expr::create_physical_expr; use object_store::path::Path; @@ -242,6 +244,7 @@ async fn prune_partitions( partitions: Vec, filters: &[Expr], partition_cols: &[(String, DataType)], + config_options: &Arc, ) -> Result> { if filters.is_empty() { return Ok(partitions); @@ -293,7 +296,7 @@ async fn prune_partitions( // Applies `filter` to `batch` returning `None` on error let do_filter = |filter| -> Result { - let expr = create_physical_expr(filter, &df_schema, &props)?; + let expr = create_physical_expr(filter, &df_schema, &props, config_options)?; expr.evaluate(&batch)?.into_array(partitions.len()) }; @@ -412,6 +415,7 @@ pub async fn pruned_partition_list<'a>( filters: &'a [Expr], file_extension: &'a str, partition_cols: &'a [(String, DataType)], + config_options: &Arc, ) -> Result>> { // if no partition col => simply list all the files if partition_cols.is_empty() { @@ -436,8 +440,14 @@ pub async fn pruned_partition_list<'a>( .await?; debug!("Listed {} partitions", partitions.len()); - let pruned = - prune_partitions(table_path, partitions, filters, partition_cols).await?; + let pruned = prune_partitions( + table_path, + partitions, + filters, + partition_cols, + config_options, + ) + .await?; debug!("Pruning yielded {} partitions", pruned.len()); @@ -605,6 +615,7 @@ mod tests { &[filter], ".parquet", &[(String::from("mypartition"), DataType::Utf8)], + &Arc::clone(ConfigOptions::default_singleton_arc()), ) .await .expect("partition pruning failed") @@ -630,6 +641,7 @@ mod tests { &[filter], ".parquet", &[(String::from("mypartition"), DataType::Utf8)], + &Arc::clone(ConfigOptions::default_singleton_arc()), ) .await .expect("partition pruning failed") @@ -673,6 +685,7 @@ mod tests { (String::from("part1"), DataType::Utf8), (String::from("part2"), DataType::Utf8), ], + &Arc::clone(ConfigOptions::default_singleton_arc()), ) .await .expect("partition pruning failed") @@ -1016,10 +1029,17 @@ mod tests { .unwrap(); } - (Arc::new(memory), Arc::new(MockSession {})) + ( + Arc::new(memory), + Arc::new(MockSession { + config: SessionConfig::new(), + }), + ) } - struct MockSession {} + struct MockSession { + config: SessionConfig, + } #[async_trait] impl Session for MockSession { @@ -1028,7 +1048,7 @@ mod tests { } fn config(&self) -> &SessionConfig { - unimplemented!() + &self.config } async fn create_physical_plan( diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index b0f17630c910..cfbea21e75ce 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -22,6 +22,7 @@ use std::collections::{BTreeMap, HashMap}; use std::error::Error; use std::fmt::{self, Display}; use std::str::FromStr; +use std::sync::{Arc, LazyLock}; use crate::error::_config_err; use crate::parsers::CompressionTypeVariant; @@ -724,7 +725,7 @@ config_namespace! { } /// A key value pair, with a corresponding description -#[derive(Debug)] +#[derive(Debug, Hash, PartialEq, Eq)] pub struct ConfigEntry { /// A unique string to identify this config value pub key: String, @@ -777,7 +778,20 @@ impl ConfigField for ConfigOptions { } } +static CONFIG_OPTIONS_SINGLETON: LazyLock> = + LazyLock::new(|| Arc::new(ConfigOptions::default())); + impl ConfigOptions { + /// this is a static singleton to be used for testing only where the default values are sufficient + pub fn default_singleton() -> &'static ConfigOptions { + CONFIG_OPTIONS_SINGLETON.as_ref() + } + + /// this is a static singleton to be used for testing only where the default values are sufficient + pub fn default_singleton_arc() -> &'static Arc { + &CONFIG_OPTIONS_SINGLETON + } + /// Creates a new [`ConfigOptions`] with default values pub fn new() -> Self { Self::default() diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index fd1fd4164da0..1314279090bc 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -144,7 +144,7 @@ zstd = { version = "0.13", optional = true, default-features = false } [dev-dependencies] async-trait = { workspace = true } -criterion = { workspace = true, features = ["async_tokio"] } +criterion = { workspace = true, features = ["async_tokio", "async_futures"] } ctor = { workspace = true } dashmap = "6.1.0" datafusion-doc = { workspace = true } diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index 4d7762784d78..6bad5325b930 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -891,6 +891,8 @@ impl TableProvider for ListingTable { } let output_ordering = self.try_create_output_ordering()?; + let config_options = Arc::new(session_state.config_options().clone()); + match state .config_options() .execution @@ -924,6 +926,7 @@ impl TableProvider for ListingTable { &expr, &table_df_schema, state.execution_props(), + &config_options, )?; Some(filters) } @@ -1021,6 +1024,7 @@ impl TableProvider for ListingTable { // TODO (https://github.com/apache/datafusion/issues/11600) remove downcast_ref from here? let session_state = state.as_any().downcast_ref::().unwrap(); + let config_options = Arc::new(state.config_options().clone()); let file_list_stream = pruned_partition_list( session_state, store.as_ref(), @@ -1028,6 +1032,7 @@ impl TableProvider for ListingTable { &[], &self.options.file_extension, &self.options.table_partition_cols, + &config_options, ) .await?; @@ -1094,6 +1099,7 @@ impl ListingTable { return Ok((vec![], Statistics::new_unknown(&self.file_schema))); }; // list files (with partitions) + let config_options = Arc::new(ctx.config_options().clone()); let file_list = future::try_join_all(self.table_paths.iter().map(|table_path| { pruned_partition_list( ctx, @@ -1102,6 +1108,7 @@ impl ListingTable { filters, &self.options.file_extension, &self.options.table_partition_cols, + &config_options, ) })) .await?; diff --git a/datafusion/core/src/datasource/memory.rs b/datafusion/core/src/datasource/memory.rs index d96944fa7a69..452992c45a14 100644 --- a/datafusion/core/src/datasource/memory.rs +++ b/datafusion/core/src/datasource/memory.rs @@ -236,6 +236,8 @@ impl TableProvider for MemTable { // add sort information if present let sort_order = self.sort_order.lock(); + let config_options = Arc::new(state.config_options().clone()); + if !sort_order.is_empty() { let df_schema = DFSchema::try_from(self.schema.as_ref().clone())?; @@ -246,6 +248,7 @@ impl TableProvider for MemTable { sort_exprs, &df_schema, state.execution_props(), + &config_options, ) }) .collect::>>()?; diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index ad0993ed43ca..80cb30828d81 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1829,6 +1829,7 @@ mod tests { use crate::execution::session_state::SessionStateBuilder; use crate::physical_planner::PhysicalPlanner; use async_trait::async_trait; + use datafusion_common::config::ConfigOptions; use datafusion_expr::planner::TypePlanner; use sqlparser::ast; use tempfile::TempDir; @@ -2281,6 +2282,7 @@ mod tests { _expr: &Expr, _input_dfschema: &DFSchema, _session_state: &SessionState, + _config_options: &Arc, ) -> Result> { unimplemented!() } diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index f4b0fd0c125f..592e24188c58 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -560,7 +560,7 @@ impl SessionState { // analyze & capture output of each rule let analyzer_result = self.analyzer.execute_and_check( e.plan.as_ref().clone(), - self.options(), + self.config_options(), |analyzed_plan, analyzer| { let analyzer_name = analyzer.name().to_string(); let plan_type = PlanType::AnalyzedLogicalPlan { analyzer_name }; @@ -590,9 +590,10 @@ impl SessionState { .push(analyzed_plan.to_stringified(PlanType::FinalAnalyzedLogicalPlan)); // optimize the child plan, capturing the output of each optimizer + let session_optimizer_config = SessionStateOptimizerConfig::new(self); let optimized_plan = self.optimizer.optimize( analyzed_plan, - self, + &session_optimizer_config, |optimized_plan, optimizer| { let optimizer_name = optimizer.name().to_string(); let plan_type = PlanType::OptimizedLogicalPlan { optimizer_name }; @@ -620,10 +621,12 @@ impl SessionState { } else { let analyzed_plan = self.analyzer.execute_and_check( plan.clone(), - self.options(), + self.config_options(), |_, _| {}, )?; - self.optimizer.optimize(analyzed_plan, self, |_, _| {}) + let session_optimizer_config = SessionStateOptimizerConfig::new(self); + self.optimizer + .optimize(analyzed_plan, &session_optimizer_config, |_, _| {}) } } @@ -666,19 +669,24 @@ impl SessionState { expr: Expr, df_schema: &DFSchema, ) -> datafusion_common::Result> { - let simplifier = - ExprSimplifier::new(SessionSimplifyProvider::new(self, df_schema)); + let config_options = Arc::new(self.config_options().clone()); + let simplifier = ExprSimplifier::new(SessionSimplifyProvider::new( + self, + df_schema, + &config_options, + )); // apply type coercion here to ensure types match let mut expr = simplifier.coerce(expr, df_schema)?; // rewrite Exprs to functions if necessary - let config_options = self.config_options(); for rewrite in self.analyzer.function_rewrites() { expr = expr - .transform_up(|expr| rewrite.rewrite(expr, df_schema, config_options))? + .transform_up(|expr| { + rewrite.rewrite(expr, df_schema, self.config_options()) + })? .data; } - create_physical_expr(&expr, df_schema, self.execution_props()) + create_physical_expr(&expr, df_schema, self.execution_props(), &config_options) } /// Return the session ID @@ -1853,7 +1861,26 @@ impl FunctionRegistry for SessionState { } } -impl OptimizerConfig for SessionState { +/// An [`OptimizerConfig`] for use with [`SessionState`] +pub struct SessionStateOptimizerConfig<'a> { + execution_props: &'a ExecutionProps, + config_options: Arc, + function_registry: Option<&'a dyn FunctionRegistry>, +} + +impl<'a> SessionStateOptimizerConfig<'a> { + /// Create optimizer config + pub fn new(session_state: &'a SessionState) -> Self { + let config_options = Arc::new(session_state.config.options().clone()); + Self { + execution_props: &session_state.execution_props, + config_options, + function_registry: Some(session_state), + } + } +} + +impl OptimizerConfig for SessionStateOptimizerConfig<'_> { fn query_execution_start_time(&self) -> DateTime { self.execution_props.query_execution_start_time } @@ -1862,12 +1889,12 @@ impl OptimizerConfig for SessionState { &self.execution_props.alias_generator } - fn options(&self) -> &ConfigOptions { - self.config_options() + fn options(&self) -> &Arc { + &self.config_options } fn function_registry(&self) -> Option<&dyn FunctionRegistry> { - Some(self) + self.function_registry } } @@ -1909,11 +1936,20 @@ impl QueryPlanner for DefaultQueryPlanner { struct SessionSimplifyProvider<'a> { state: &'a SessionState, df_schema: &'a DFSchema, + config_options: &'a Arc, } impl<'a> SessionSimplifyProvider<'a> { - fn new(state: &'a SessionState, df_schema: &'a DFSchema) -> Self { - Self { state, df_schema } + fn new( + state: &'a SessionState, + df_schema: &'a DFSchema, + config_options: &'a Arc, + ) -> Self { + Self { + state, + df_schema, + config_options, + } } } @@ -1930,6 +1966,10 @@ impl SimplifyInfo for SessionSimplifyProvider<'_> { self.state.execution_props() } + fn config_options(&self) -> &Arc { + self.config_options + } + fn get_data_type(&self, expr: &Expr) -> datafusion_common::Result { expr.get_type(self.df_schema) } diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 6aff9280ffad..5cfe2a0e44e7 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -93,6 +93,7 @@ use datafusion_physical_plan::DisplayFormatType; use crate::schema_equivalence::schema_satisfied_by; use async_trait::async_trait; +use datafusion_common::config::ConfigOptions; use futures::{StreamExt, TryStreamExt}; use itertools::{multiunzip, Itertools}; use log::{debug, trace}; @@ -121,6 +122,7 @@ pub trait PhysicalPlanner: Send + Sync { expr: &Expr, input_dfschema: &DFSchema, session_state: &SessionState, + config_options: &Arc, ) -> Result>; } @@ -202,8 +204,14 @@ impl PhysicalPlanner for DefaultPhysicalPlanner { expr: &Expr, input_dfschema: &DFSchema, session_state: &SessionState, + config_options: &Arc, ) -> Result> { - create_physical_expr(expr, input_dfschema, session_state.execution_props()) + create_physical_expr( + expr, + input_dfschema, + session_state.execution_props(), + config_options, + ) } } @@ -328,11 +336,18 @@ impl DefaultPhysicalPlanner { // all converge down to the root node, which can only be processed by a // single task. let max_concurrency = planning_concurrency.min(flat_tree_leaf_indices.len()); + // clone the config_options once per call + let config_options = Arc::new(session_state.config_options().clone()); // Spawning tasks which will traverse leaf up to the root. - let tasks = flat_tree_leaf_indices - .into_iter() - .map(|index| self.task_helper(index, Arc::clone(&flat_tree), session_state)); + let tasks = flat_tree_leaf_indices.into_iter().map(|index| { + self.task_helper( + index, + Arc::clone(&flat_tree), + session_state, + &config_options, + ) + }); let mut outputs = futures::stream::iter(tasks) .buffer_unordered(max_concurrency) .try_collect::>() @@ -360,6 +375,7 @@ impl DefaultPhysicalPlanner { leaf_starter_index: usize, flat_tree: Arc>>, session_state: &'a SessionState, + config_options: &'a Arc, ) -> Result>> { // We always start with a leaf, so can ignore status and pass empty children let mut node = flat_tree.get(leaf_starter_index).ok_or_else(|| { @@ -371,6 +387,7 @@ impl DefaultPhysicalPlanner { .map_logical_node_to_physical( node.node, session_state, + config_options, ChildrenContainer::None, ) .await?; @@ -388,6 +405,7 @@ impl DefaultPhysicalPlanner { .map_logical_node_to_physical( node.node, session_state, + config_options, ChildrenContainer::One(plan), ) .await?; @@ -424,7 +442,12 @@ impl DefaultPhysicalPlanner { let children = children.into_iter().map(|epc| epc.plan).collect(); let children = ChildrenContainer::Multiple(children); plan = self - .map_logical_node_to_physical(node.node, session_state, children) + .map_logical_node_to_physical( + node.node, + session_state, + config_options, + children, + ) .await?; } } @@ -439,6 +462,7 @@ impl DefaultPhysicalPlanner { &self, node: &LogicalPlan, session_state: &SessionState, + config_options: &Arc, children: ChildrenContainer, ) -> Result> { let exec_node: Arc = match node { @@ -466,7 +490,12 @@ impl DefaultPhysicalPlanner { .map(|row| { row.iter() .map(|expr| { - self.create_physical_expr(expr, schema, session_state) + self.create_physical_expr( + expr, + schema, + session_state, + config_options, + ) }) .collect::>>>() }) @@ -519,7 +548,7 @@ impl DefaultPhysicalPlanner { let keep_partition_by_columns = match source_option_tuples .get("execution.keep_partition_by_columns") .map(|v| v.trim()) { - None => session_state.config().options().execution.keep_partition_by_columns, + None => config_options.execution.keep_partition_by_columns, Some("true") => true, Some("false") => false, Some(value) => @@ -616,6 +645,7 @@ impl DefaultPhysicalPlanner { e, logical_schema, session_state.execution_props(), + config_options, ) }) .collect::>>()?; @@ -648,14 +678,15 @@ impl DefaultPhysicalPlanner { aggr_expr, .. }) => { - let options = session_state.config().options(); // Initially need to perform the aggregate and then merge the partitions let input_exec = children.one()?; let physical_input_schema = input_exec.schema(); let logical_input_schema = input.as_ref().schema(); let physical_input_schema_from_logical = logical_input_schema.inner(); - if !options.execution.skip_physical_aggregate_schema_check + if !config_options + .execution + .skip_physical_aggregate_schema_check && !schema_satisfied_by( physical_input_schema_from_logical, &physical_input_schema, @@ -703,6 +734,7 @@ impl DefaultPhysicalPlanner { logical_input_schema, &physical_input_schema, session_state, + config_options, )?; let agg_filter = aggr_expr @@ -713,6 +745,7 @@ impl DefaultPhysicalPlanner { logical_input_schema, &physical_input_schema, session_state.execution_props(), + config_options, ) }) .collect::>>()?; @@ -763,6 +796,7 @@ impl DefaultPhysicalPlanner { LogicalPlan::Projection(Projection { input, expr, .. }) => self .create_project_physical_exec( session_state, + config_options, children.one()?, input, expr, @@ -773,8 +807,12 @@ impl DefaultPhysicalPlanner { let physical_input = children.one()?; let input_dfschema = input.schema(); - let runtime_expr = - self.create_physical_expr(predicate, input_dfschema, session_state)?; + let runtime_expr = self.create_physical_expr( + predicate, + input_dfschema, + session_state, + config_options, + )?; let selectivity = session_state .config() .options() @@ -801,6 +839,7 @@ impl DefaultPhysicalPlanner { e, input_dfschema, session_state, + config_options, ) }) .collect::>>()?; @@ -826,6 +865,7 @@ impl DefaultPhysicalPlanner { expr, input_dfschema, session_state.execution_props(), + config_options, )?; let new_sort = SortExec::new(sort_expr, physical_input).with_fetch(*fetch); @@ -943,6 +983,7 @@ impl DefaultPhysicalPlanner { LogicalPlan::Projection(Projection { input, expr, .. }), ) => self.create_project_physical_exec( session_state, + config_options, physical_left, input, expr, @@ -956,6 +997,7 @@ impl DefaultPhysicalPlanner { LogicalPlan::Projection(Projection { input, expr, .. }), ) => self.create_project_physical_exec( session_state, + config_options, physical_right, input, expr, @@ -1008,9 +1050,18 @@ impl DefaultPhysicalPlanner { let join_on = keys .iter() .map(|(l, r)| { - let l = create_physical_expr(l, left_df_schema, execution_props)?; - let r = - create_physical_expr(r, right_df_schema, execution_props)?; + let l = create_physical_expr( + l, + left_df_schema, + execution_props, + config_options, + )?; + let r = create_physical_expr( + r, + right_df_schema, + execution_props, + config_options, + )?; Ok((l, r)) }) .collect::>()?; @@ -1082,6 +1133,7 @@ impl DefaultPhysicalPlanner { expr, &filter_df_schema, session_state.execution_props(), + config_options, )?; let column_indices = join_utils::JoinFilter::build_column_indices( left_field_indices, @@ -1097,8 +1149,7 @@ impl DefaultPhysicalPlanner { _ => None, }; - let prefer_hash_join = - session_state.config_options().optimizer.prefer_hash_join; + let prefer_hash_join = config_options.optimizer.prefer_hash_join; let join: Arc = if join_on.is_empty() { if join_filter.is_none() && matches!(join_type, JoinType::Inner) { @@ -1168,7 +1219,13 @@ impl DefaultPhysicalPlanner { // If plan was mutated previously then need to create the ExecutionPlan // for the new Projection that was applied on top. if let Some((input, expr)) = new_project { - self.create_project_physical_exec(session_state, join, input, expr)? + self.create_project_physical_exec( + session_state, + config_options, + join, + input, + expr, + )? } else { join } @@ -1269,6 +1326,7 @@ impl DefaultPhysicalPlanner { input_dfschema: &DFSchema, input_schema: &Schema, session_state: &SessionState, + config_options: &Arc, ) -> Result { if group_expr.len() == 1 { match &group_expr[0] { @@ -1278,6 +1336,7 @@ impl DefaultPhysicalPlanner { input_dfschema, input_schema, session_state, + config_options, ) } Expr::GroupingSet(GroupingSet::Cube(exprs)) => create_cube_physical_expr( @@ -1285,6 +1344,7 @@ impl DefaultPhysicalPlanner { input_dfschema, input_schema, session_state, + config_options, ), Expr::GroupingSet(GroupingSet::Rollup(exprs)) => { create_rollup_physical_expr( @@ -1292,10 +1352,16 @@ impl DefaultPhysicalPlanner { input_dfschema, input_schema, session_state, + config_options, ) } expr => Ok(PhysicalGroupBy::new_single(vec![tuple_err(( - self.create_physical_expr(expr, input_dfschema, session_state), + self.create_physical_expr( + expr, + input_dfschema, + session_state, + config_options, + ), physical_name(expr), ))?])), } @@ -1305,7 +1371,12 @@ impl DefaultPhysicalPlanner { .iter() .map(|e| { tuple_err(( - self.create_physical_expr(e, input_dfschema, session_state), + self.create_physical_expr( + e, + input_dfschema, + session_state, + config_options, + ), physical_name(e), )) }) @@ -1330,6 +1401,7 @@ fn merge_grouping_set_physical_expr( input_dfschema: &DFSchema, input_schema: &Schema, session_state: &SessionState, + config_options: &Arc, ) -> Result { let num_groups = grouping_sets.len(); let mut all_exprs: Vec = vec![]; @@ -1344,6 +1416,7 @@ fn merge_grouping_set_physical_expr( expr, input_dfschema, session_state, + config_options, )?); null_exprs.push(get_null_physical_expr_pair( @@ -1351,6 +1424,7 @@ fn merge_grouping_set_physical_expr( input_dfschema, input_schema, session_state, + config_options, )?); } } @@ -1380,6 +1454,7 @@ fn create_cube_physical_expr( input_dfschema: &DFSchema, input_schema: &Schema, session_state: &SessionState, + config_options: &Arc, ) -> Result { let num_of_exprs = exprs.len(); let num_groups = num_of_exprs * num_of_exprs; @@ -1395,9 +1470,15 @@ fn create_cube_physical_expr( input_dfschema, input_schema, session_state, + config_options, )?); - all_exprs.push(get_physical_expr_pair(expr, input_dfschema, session_state)?) + all_exprs.push(get_physical_expr_pair( + expr, + input_dfschema, + session_state, + config_options, + )?) } let mut groups: Vec> = Vec::with_capacity(num_groups); @@ -1422,6 +1503,7 @@ fn create_rollup_physical_expr( input_dfschema: &DFSchema, input_schema: &Schema, session_state: &SessionState, + config_options: &Arc, ) -> Result { let num_of_exprs = exprs.len(); @@ -1438,9 +1520,15 @@ fn create_rollup_physical_expr( input_dfschema, input_schema, session_state, + config_options, )?); - all_exprs.push(get_physical_expr_pair(expr, input_dfschema, session_state)?) + all_exprs.push(get_physical_expr_pair( + expr, + input_dfschema, + session_state, + config_options, + )?) } for total in 0..=num_of_exprs { @@ -1466,9 +1554,14 @@ fn get_null_physical_expr_pair( input_dfschema: &DFSchema, input_schema: &Schema, session_state: &SessionState, + config_options: &Arc, ) -> Result<(Arc, String)> { - let physical_expr = - create_physical_expr(expr, input_dfschema, session_state.execution_props())?; + let physical_expr = create_physical_expr( + expr, + input_dfschema, + session_state.execution_props(), + config_options, + )?; let physical_name = physical_name(&expr.clone())?; let data_type = physical_expr.data_type(input_schema)?; @@ -1482,9 +1575,14 @@ fn get_physical_expr_pair( expr: &Expr, input_dfschema: &DFSchema, session_state: &SessionState, + config_options: &Arc, ) -> Result<(Arc, String)> { - let physical_expr = - create_physical_expr(expr, input_dfschema, session_state.execution_props())?; + let physical_expr = create_physical_expr( + expr, + input_dfschema, + session_state.execution_props(), + config_options, + )?; let physical_name = physical_name(expr)?; Ok((physical_expr, physical_name)) } @@ -1515,6 +1613,7 @@ pub fn create_window_expr_with_name( name: impl Into, logical_schema: &DFSchema, execution_props: &ExecutionProps, + config_options: &Arc, ) -> Result> { let name = name.into(); let physical_schema: &Schema = &logical_schema.into(); @@ -1530,12 +1629,24 @@ pub fn create_window_expr_with_name( null_treatment, }, }) => { - let physical_args = - create_physical_exprs(args, logical_schema, execution_props)?; - let partition_by = - create_physical_exprs(partition_by, logical_schema, execution_props)?; - let order_by = - create_physical_sort_exprs(order_by, logical_schema, execution_props)?; + let physical_args = create_physical_exprs( + args, + logical_schema, + execution_props, + config_options, + )?; + let partition_by = create_physical_exprs( + partition_by, + logical_schema, + execution_props, + config_options, + )?; + let order_by = create_physical_sort_exprs( + order_by, + logical_schema, + execution_props, + config_options, + )?; if !is_window_frame_bound_valid(window_frame) { return plan_err!( @@ -1567,13 +1678,14 @@ pub fn create_window_expr( e: &Expr, logical_schema: &DFSchema, execution_props: &ExecutionProps, + config_options: &Arc, ) -> Result> { // unpack aliased logical expressions, e.g. "sum(col) over () as total" let (name, e) = match e { Expr::Alias(Alias { expr, name, .. }) => (name.clone(), expr.as_ref()), _ => (e.schema_name().to_string(), e), }; - create_window_expr_with_name(e, name, logical_schema, execution_props) + create_window_expr_with_name(e, name, logical_schema, execution_props, config_options) } type AggregateExprWithOptionalArgs = ( @@ -1591,6 +1703,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter( logical_input_schema: &DFSchema, physical_input_schema: &Schema, execution_props: &ExecutionProps, + config_options: &Arc, ) -> Result { match e { Expr::AggregateFunction(AggregateFunction { @@ -1610,13 +1723,18 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter( physical_name(e)? }; - let physical_args = - create_physical_exprs(args, logical_input_schema, execution_props)?; + let physical_args = create_physical_exprs( + args, + logical_input_schema, + execution_props, + config_options, + )?; let filter = match filter { Some(e) => Some(create_physical_expr( e, logical_input_schema, execution_props, + config_options, )?), None => None, }; @@ -1630,6 +1748,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter( exprs, logical_input_schema, execution_props, + config_options, )?), None => None, }; @@ -1662,6 +1781,7 @@ pub fn create_aggregate_expr_and_maybe_filter( logical_input_schema: &DFSchema, physical_input_schema: &Schema, execution_props: &ExecutionProps, + config_options: &Arc, ) -> Result { // unpack (nested) aliased logical expressions, e.g. "sum(col) as total" let (name, e) = match e { @@ -1676,6 +1796,7 @@ pub fn create_aggregate_expr_and_maybe_filter( logical_input_schema, physical_input_schema, execution_props, + config_options, ) } @@ -1684,6 +1805,7 @@ pub fn create_physical_sort_expr( e: &SortExpr, input_dfschema: &DFSchema, execution_props: &ExecutionProps, + config_options: &Arc, ) -> Result { let SortExpr { expr, @@ -1691,7 +1813,12 @@ pub fn create_physical_sort_expr( nulls_first, } = e; Ok(PhysicalSortExpr { - expr: create_physical_expr(expr, input_dfschema, execution_props)?, + expr: create_physical_expr( + expr, + input_dfschema, + execution_props, + config_options, + )?, options: SortOptions { descending: !asc, nulls_first: *nulls_first, @@ -1704,10 +1831,18 @@ pub fn create_physical_sort_exprs( exprs: &[SortExpr], input_dfschema: &DFSchema, execution_props: &ExecutionProps, + config_options: &Arc, ) -> Result { exprs .iter() - .map(|expr| create_physical_sort_expr(expr, input_dfschema, execution_props)) + .map(|expr| { + create_physical_sort_expr( + expr, + input_dfschema, + execution_props, + config_options, + ) + }) .collect::>() } @@ -1990,6 +2125,7 @@ impl DefaultPhysicalPlanner { fn create_project_physical_exec( &self, session_state: &SessionState, + config_options: &Arc, input_exec: Arc, input: &Arc, expr: &[Expr], @@ -2027,7 +2163,12 @@ impl DefaultPhysicalPlanner { }; tuple_err(( - self.create_physical_expr(e, input_schema, session_state), + self.create_physical_expr( + e, + input_schema, + session_state, + config_options, + ), physical_name, )) }) @@ -2210,12 +2351,14 @@ mod tests { let physical_input_schema = physical_input_schema.as_ref(); let logical_input_schema = logical_plan.schema(); let session_state = make_session_state(); + let config_options = ConfigOptions::default_singleton_arc(); let cube = create_cube_physical_expr( &exprs, logical_input_schema, physical_input_schema, &session_state, + config_options, ); let expected = r#"Ok(PhysicalGroupBy { expr: [(Column { name: "c1", index: 0 }, "c1"), (Column { name: "c2", index: 1 }, "c2"), (Column { name: "c3", index: 2 }, "c3")], null_expr: [(Literal { value: Utf8(NULL) }, "c1"), (Literal { value: Int64(NULL) }, "c2"), (Literal { value: Int64(NULL) }, "c3")], groups: [[false, false, false], [true, false, false], [false, true, false], [false, false, true], [true, true, false], [true, false, true], [false, true, true], [true, true, true]] })"#; @@ -2237,12 +2380,14 @@ mod tests { let physical_input_schema = physical_input_schema.as_ref(); let logical_input_schema = logical_plan.schema(); let session_state = make_session_state(); + let config_options = ConfigOptions::default_singleton_arc(); let rollup = create_rollup_physical_expr( &exprs, logical_input_schema, physical_input_schema, &session_state, + config_options, ); let expected = r#"Ok(PhysicalGroupBy { expr: [(Column { name: "c1", index: 0 }, "c1"), (Column { name: "c2", index: 1 }, "c2"), (Column { name: "c3", index: 2 }, "c3")], null_expr: [(Literal { value: Utf8(NULL) }, "c1"), (Literal { value: Int64(NULL) }, "c2"), (Literal { value: Int64(NULL) }, "c3")], groups: [[true, true, true], [false, true, true], [false, false, true], [false, false, false]] })"#; @@ -2258,11 +2403,13 @@ mod tests { let dfschema = DFSchema::try_from(schema.clone())?; let planner = DefaultPhysicalPlanner::default(); + let config_options = ConfigOptions::default_singleton_arc(); let expr = planner.create_physical_expr( &col("a").not(), &dfschema, &make_session_state(), + config_options, )?; let expected = expressions::not(expressions::col("a", &schema)?)?; diff --git a/datafusion/core/src/test_util/parquet.rs b/datafusion/core/src/test_util/parquet.rs index c0be13baf21a..2aa1df00c6f8 100644 --- a/datafusion/core/src/test_util/parquet.rs +++ b/datafusion/core/src/test_util/parquet.rs @@ -175,12 +175,18 @@ impl TestParquetFile { // run coercion on the filters to coerce types etc. let props = ExecutionProps::new(); - let context = SimplifyContext::new(&props).with_schema(Arc::clone(&df_schema)); + let config_options = ConfigOptions::default_singleton_arc(); + let context = SimplifyContext::new(&props, config_options) + .with_schema(Arc::clone(&df_schema)); if let Some(filter) = maybe_filter { let simplifier = ExprSimplifier::new(context); let filter = simplifier.coerce(filter, &df_schema).unwrap(); - let physical_filter_expr = - create_physical_expr(&filter, &df_schema, &ExecutionProps::default())?; + let physical_filter_expr = create_physical_expr( + &filter, + &df_schema, + &ExecutionProps::default(), + &Arc::clone(ConfigOptions::default_singleton_arc()), + )?; let source = Arc::new(ParquetSource::new(parquet_options).with_predicate( Arc::clone(&scan_config.file_schema), diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs index aef10379da07..c8f61cf0f096 100644 --- a/datafusion/core/tests/expr_api/mod.rs +++ b/datafusion/core/tests/expr_api/mod.rs @@ -22,6 +22,7 @@ use arrow::array::{ use arrow::datatypes::{DataType, Field}; use arrow::util::pretty::{pretty_format_batches, pretty_format_columns}; use datafusion::prelude::*; +use datafusion_common::config::ConfigOptions; use datafusion_common::{DFSchema, ScalarValue}; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::simplify::SimplifyContext; @@ -404,7 +405,8 @@ fn create_simplified_expr_test(expr: Expr, expected_expr: &str) { // Simplify the expression first let props = ExecutionProps::new(); let simplify_context = - SimplifyContext::new(&props).with_schema(df_schema.clone().into()); + SimplifyContext::new(&props, ConfigOptions::default_singleton_arc()) + .with_schema(df_schema.clone().into()); let simplifier = ExprSimplifier::new(simplify_context).with_max_cycles(10); let simplified = simplifier.simplify(expr).unwrap(); create_expr_test(simplified, expected_expr); diff --git a/datafusion/core/tests/expr_api/simplification.rs b/datafusion/core/tests/expr_api/simplification.rs index 7bb21725ef40..a3ed5e4a634c 100644 --- a/datafusion/core/tests/expr_api/simplification.rs +++ b/datafusion/core/tests/expr_api/simplification.rs @@ -23,6 +23,7 @@ use arrow::datatypes::{DataType, Field, Schema}; use chrono::{DateTime, TimeZone, Utc}; use datafusion::{error::Result, execution::context::ExecutionProps, prelude::*}; use datafusion_common::cast::as_int32_array; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_common::{DFSchemaRef, ToDFSchema}; use datafusion_expr::expr::ScalarFunction; @@ -50,6 +51,9 @@ struct MyInfo { /// Execution specific details needed for constant evaluation such /// as the current time for `now()` and [VariableProviders] execution_props: ExecutionProps, + + /// config options needed for scalar function evaluation + config_options: Arc, } impl SimplifyInfo for MyInfo { @@ -68,6 +72,10 @@ impl SimplifyInfo for MyInfo { &self.execution_props } + fn config_options(&self) -> &Arc { + &self.config_options + } + fn get_data_type(&self, expr: &Expr) -> Result { expr.get_type(self.schema.as_ref()) } @@ -78,6 +86,7 @@ impl From for MyInfo { Self { schema, execution_props: ExecutionProps::new(), + config_options: Arc::clone(ConfigOptions::default_singleton_arc()), } } } @@ -132,10 +141,11 @@ fn test_evaluate_with_start_time( ) { let execution_props = ExecutionProps::new().with_query_execution_start_time(*date_time); - + let config_options = Arc::clone(ConfigOptions::default_singleton_arc()); let info: MyInfo = MyInfo { schema: schema(), execution_props, + config_options, }; let simplifier = ExprSimplifier::new(info); let simplified_expr = simplifier @@ -522,9 +532,11 @@ fn expr_test_schema() -> DFSchemaRef { } fn test_simplify(input_expr: Expr, expected_expr: Expr) { + let config_options = Arc::clone(ConfigOptions::default_singleton_arc()); let info: MyInfo = MyInfo { schema: expr_test_schema(), execution_props: ExecutionProps::new(), + config_options, }; let simplifier = ExprSimplifier::new(info); let simplified_expr = simplifier @@ -541,9 +553,11 @@ fn test_simplify_with_cycle_count( expected_expr: Expr, expected_count: u32, ) { + let config_options = Arc::clone(ConfigOptions::default_singleton_arc()); let info: MyInfo = MyInfo { schema: expr_test_schema(), execution_props: ExecutionProps::new(), + config_options, }; let simplifier = ExprSimplifier::new(info); let (simplified_expr, count) = simplifier diff --git a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs index 769deef1187d..9947a383d369 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs @@ -21,6 +21,7 @@ use crate::fuzz_cases::equivalence::utils::{ is_table_same_after_sort, TestScalarUDF, }; use arrow::compute::SortOptions; +use datafusion_common::config::ConfigOptions; use datafusion_common::Result; use datafusion_expr::{Operator, ScalarUDF}; use datafusion_physical_expr::expressions::{col, BinaryExpr}; @@ -110,6 +111,7 @@ fn test_ordering_satisfy_with_equivalence_complex_random() -> Result<()> { Arc::clone(&test_fun), vec![col_a], &test_schema, + &Arc::clone(ConfigOptions::default_singleton_arc()), )?); let a_plus_b = Arc::new(BinaryExpr::new( col("a", &test_schema)?, diff --git a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs index a3fa1157b38f..e92a9ef0beb7 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs @@ -20,6 +20,7 @@ use crate::fuzz_cases::equivalence::utils::{ is_table_same_after_sort, TestScalarUDF, }; use arrow::compute::SortOptions; +use datafusion_common::config::ConfigOptions; use datafusion_common::Result; use datafusion_expr::{Operator, ScalarUDF}; use datafusion_physical_expr::equivalence::ProjectionMapping; @@ -49,6 +50,7 @@ fn project_orderings_random() -> Result<()> { Arc::clone(&test_fun), vec![col_a], &test_schema, + ConfigOptions::default_singleton_arc(), )?); // a + b let a_plus_b = Arc::new(BinaryExpr::new( @@ -126,6 +128,7 @@ fn ordering_satisfy_after_projection_random() -> Result<()> { Arc::clone(&test_fun), vec![col_a], &test_schema, + ConfigOptions::default_singleton_arc(), )?) as PhysicalExprRef; // a + b let a_plus_b = Arc::new(BinaryExpr::new( diff --git a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs index 593e1c6c2dca..93c2ff3d8550 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs @@ -19,6 +19,7 @@ use crate::fuzz_cases::equivalence::utils::{ create_random_schema, generate_table_for_eq_properties, is_table_same_after_sort, TestScalarUDF, }; +use datafusion_common::config::ConfigOptions; use datafusion_common::Result; use datafusion_expr::{Operator, ScalarUDF}; use datafusion_physical_expr::expressions::{col, BinaryExpr}; @@ -47,6 +48,7 @@ fn test_find_longest_permutation_random() -> Result<()> { Arc::clone(&test_fun), vec![col_a], &test_schema, + &Arc::clone(ConfigOptions::default_singleton_arc()), )?) as PhysicalExprRef; let a_plus_b = Arc::new(BinaryExpr::new( diff --git a/datafusion/core/tests/parquet/page_pruning.rs b/datafusion/core/tests/parquet/page_pruning.rs index fe96a2eb5e71..26357c5c4a9d 100644 --- a/datafusion/core/tests/parquet/page_pruning.rs +++ b/datafusion/core/tests/parquet/page_pruning.rs @@ -31,7 +31,6 @@ use datafusion::physical_plan::metrics::MetricValue; use datafusion::physical_plan::ExecutionPlan; use datafusion::prelude::SessionContext; use datafusion_common::{ScalarValue, ToDFSchema}; -use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::{col, lit, Expr}; use datafusion_physical_expr::create_physical_expr; @@ -71,8 +70,11 @@ async fn get_parquet_exec(state: &SessionState, filter: Expr) -> DataSourceExec }; let df_schema = schema.clone().to_dfschema().unwrap(); - let execution_props = ExecutionProps::new(); - let predicate = create_physical_expr(&filter, &df_schema, &execution_props).unwrap(); + let execution_props = state.execution_props(); + let config_options = Arc::new(state.config_options().clone()); + let predicate = + create_physical_expr(&filter, &df_schema, execution_props, &config_options) + .unwrap(); let source = Arc::new( ParquetSource::default() diff --git a/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs b/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs index a79d743cb253..aedfae28a0f4 100644 --- a/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs +++ b/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs @@ -67,8 +67,8 @@ async fn assert_count_optim_success( let task_ctx = Arc::new(TaskContext::default()); let plan: Arc = Arc::new(plan); - let config = ConfigOptions::new(); - let optimized = AggregateStatistics::new().optimize(Arc::clone(&plan), &config)?; + let optimized = AggregateStatistics::new() + .optimize(Arc::clone(&plan), ConfigOptions::default_singleton())?; // A ProjectionExec is a sign that the count optimization was applied assert!(optimized.as_any().is::()); @@ -264,8 +264,8 @@ async fn test_count_inexact_stat() -> Result<()> { Arc::clone(&schema), )?; - let conf = ConfigOptions::new(); - let optimized = AggregateStatistics::new().optimize(Arc::new(final_agg), &conf)?; + let optimized = AggregateStatistics::new() + .optimize(Arc::new(final_agg), ConfigOptions::default_singleton())?; // check that the original ExecutionPlan was not replaced assert!(optimized.as_any().is::()); @@ -308,8 +308,8 @@ async fn test_count_with_nulls_inexact_stat() -> Result<()> { Arc::clone(&schema), )?; - let conf = ConfigOptions::new(); - let optimized = AggregateStatistics::new().optimize(Arc::new(final_agg), &conf)?; + let optimized = AggregateStatistics::new() + .optimize(Arc::new(final_agg), ConfigOptions::default_singleton())?; // check that the original ExecutionPlan was not replaced assert!(optimized.as_any().is::()); diff --git a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs index 568be0d18f24..64c9e1c36135 100644 --- a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs +++ b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs @@ -48,8 +48,7 @@ macro_rules! assert_optimized { // run optimizer let optimizer = CombinePartialFinalAggregate {}; - let config = ConfigOptions::new(); - let optimized = optimizer.optimize($PLAN, &config)?; + let optimized = optimizer.optimize($PLAN, ConfigOptions::default_singleton())?; // Now format correctly let plan = displayable(optimized.as_ref()).indent(true).to_string(); let actual_lines = trim_plan_display(&plan); diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index bb77192e05b8..5bf396e3b38a 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -1863,7 +1863,6 @@ async fn test_multiple_sort_window_exec() -> Result<()> { // EnforceDistribution may invalidate ordering invariant. async fn test_commutativity() -> Result<()> { let schema = create_test_schema()?; - let config = ConfigOptions::new(); let memory_exec = memory_exec(&schema); let sort_exprs = LexOrdering::new(vec![sort_expr("nullable_col", &schema)]); @@ -1890,7 +1889,7 @@ async fn test_commutativity() -> Result<()> { Arc::new(EnforceSorting::new()) as Arc, ]; for rule in rules { - plan = rule.optimize(plan, &config)?; + plan = rule.optimize(plan, ConfigOptions::default_singleton())?; } let first_plan = plan.clone(); @@ -1901,7 +1900,7 @@ async fn test_commutativity() -> Result<()> { Arc::new(EnforceSorting::new()) as Arc, ]; for rule in rules { - plan = rule.optimize(plan, &config)?; + plan = rule.optimize(plan, ConfigOptions::default_singleton())?; } let second_plan = plan.clone(); diff --git a/datafusion/core/tests/physical_optimizer/join_selection.rs b/datafusion/core/tests/physical_optimizer/join_selection.rs index d3b6ec700bee..423980287a8f 100644 --- a/datafusion/core/tests/physical_optimizer/join_selection.rs +++ b/datafusion/core/tests/physical_optimizer/join_selection.rs @@ -228,7 +228,7 @@ async fn test_join_with_swap() { ); let optimized_join = JoinSelection::new() - .optimize(join, &ConfigOptions::new()) + .optimize(join, ConfigOptions::default_singleton()) .unwrap(); let swapping_projection = optimized_join @@ -282,7 +282,7 @@ async fn test_left_join_no_swap() { ); let optimized_join = JoinSelection::new() - .optimize(join, &ConfigOptions::new()) + .optimize(join, ConfigOptions::default_singleton()) .unwrap(); let swapped_join = optimized_join @@ -324,7 +324,7 @@ async fn test_join_with_swap_semi() { let original_schema = join.schema(); let optimized_join = JoinSelection::new() - .optimize(Arc::new(join), &ConfigOptions::new()) + .optimize(Arc::new(join), ConfigOptions::default_singleton()) .unwrap(); let swapped_join = optimized_join @@ -354,7 +354,7 @@ macro_rules! assert_optimized { let plan = Arc::new($PLAN); let optimized = JoinSelection::new() - .optimize(plan.clone(), &ConfigOptions::new()) + .optimize(plan.clone(), ConfigOptions::default_singleton()) .unwrap(); let plan_string = displayable(optimized.as_ref()).indent(true).to_string(); @@ -446,7 +446,7 @@ async fn test_join_no_swap() { ); let optimized_join = JoinSelection::new() - .optimize(join, &ConfigOptions::new()) + .optimize(join, ConfigOptions::default_singleton()) .unwrap(); let swapped_join = optimized_join @@ -487,7 +487,7 @@ async fn test_nl_join_with_swap(join_type: JoinType) { ); let optimized_join = JoinSelection::new() - .optimize(join, &ConfigOptions::new()) + .optimize(join, ConfigOptions::default_singleton()) .unwrap(); let swapping_projection = optimized_join @@ -558,7 +558,7 @@ async fn test_nl_join_with_swap_no_proj(join_type: JoinType) { let optimized_join = JoinSelection::new() .optimize( Arc::::clone(&join), - &ConfigOptions::new(), + ConfigOptions::default_singleton(), ) .unwrap(); @@ -809,7 +809,7 @@ fn check_join_partition_mode( ); let optimized_join = JoinSelection::new() - .optimize(join, &ConfigOptions::new()) + .optimize(join, ConfigOptions::default_singleton()) .unwrap(); if !is_swapped { @@ -1445,7 +1445,8 @@ async fn test_join_with_maybe_swap_unbounded_case(t: TestCase) -> Result<()> { false, )?) as _; - let optimized_join_plan = hash_join_swap_subrule(join, &ConfigOptions::new())?; + let optimized_join_plan = + hash_join_swap_subrule(join, ConfigOptions::default_singleton())?; // If swap did happen let projection_added = optimized_join_plan.as_any().is::(); diff --git a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs index dd2c1960a658..4fac0559bb6e 100644 --- a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs +++ b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs @@ -168,8 +168,8 @@ fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero() -> ]; assert_eq!(initial, expected_initial); - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + let after_optimize = LimitPushdown::new() + .optimize(global_limit, ConfigOptions::default_singleton())?; let expected = [ "StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=5" @@ -193,8 +193,8 @@ fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_li ]; assert_eq!(initial, expected_initial); - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + let after_optimize = LimitPushdown::new() + .optimize(global_limit, ConfigOptions::default_singleton())?; let expected = [ "GlobalLimitExec: skip=2, fetch=5", @@ -229,8 +229,8 @@ fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limi ]; assert_eq!(initial, expected_initial); - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + let after_optimize = LimitPushdown::new() + .optimize(global_limit, ConfigOptions::default_singleton())?; let expected = [ "CoalescePartitionsExec: fetch=5", @@ -261,8 +261,8 @@ fn pushes_global_limit_exec_through_projection_exec() -> Result<()> { ]; assert_eq!(initial, expected_initial); - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + let after_optimize = LimitPushdown::new() + .optimize(global_limit, ConfigOptions::default_singleton())?; let expected = [ "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", @@ -294,8 +294,8 @@ fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batc assert_eq!(initial, expected_initial); - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + let after_optimize = LimitPushdown::new() + .optimize(global_limit, ConfigOptions::default_singleton())?; let expected = [ "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", @@ -337,8 +337,8 @@ fn pushes_global_limit_into_multiple_fetch_plans() -> Result<()> { assert_eq!(initial, expected_initial); - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + let after_optimize = LimitPushdown::new() + .optimize(global_limit, ConfigOptions::default_singleton())?; let expected = [ "SortPreservingMergeExec: [c1@0 ASC], fetch=5", @@ -373,8 +373,8 @@ fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions() -> R ]; assert_eq!(initial, expected_initial); - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + let after_optimize = LimitPushdown::new() + .optimize(global_limit, ConfigOptions::default_singleton())?; let expected = [ "CoalescePartitionsExec: fetch=5", @@ -403,8 +403,8 @@ fn merges_local_limit_with_local_limit() -> Result<()> { assert_eq!(initial, expected_initial); - let after_optimize = - LimitPushdown::new().optimize(parent_local_limit, &ConfigOptions::new())?; + let after_optimize = LimitPushdown::new() + .optimize(parent_local_limit, ConfigOptions::default_singleton())?; let expected = ["GlobalLimitExec: skip=0, fetch=10", " EmptyExec"]; assert_eq!(get_plan_string(&after_optimize), expected); @@ -428,8 +428,8 @@ fn merges_global_limit_with_global_limit() -> Result<()> { assert_eq!(initial, expected_initial); - let after_optimize = - LimitPushdown::new().optimize(parent_global_limit, &ConfigOptions::new())?; + let after_optimize = LimitPushdown::new() + .optimize(parent_global_limit, ConfigOptions::default_singleton())?; let expected = ["GlobalLimitExec: skip=20, fetch=20", " EmptyExec"]; assert_eq!(get_plan_string(&after_optimize), expected); @@ -453,8 +453,8 @@ fn merges_global_limit_with_local_limit() -> Result<()> { assert_eq!(initial, expected_initial); - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + let after_optimize = LimitPushdown::new() + .optimize(global_limit, ConfigOptions::default_singleton())?; let expected = ["GlobalLimitExec: skip=20, fetch=20", " EmptyExec"]; assert_eq!(get_plan_string(&after_optimize), expected); @@ -479,7 +479,7 @@ fn merges_local_limit_with_global_limit() -> Result<()> { assert_eq!(initial, expected_initial); let after_optimize = - LimitPushdown::new().optimize(local_limit, &ConfigOptions::new())?; + LimitPushdown::new().optimize(local_limit, ConfigOptions::default_singleton())?; let expected = ["GlobalLimitExec: skip=20, fetch=20", " EmptyExec"]; assert_eq!(get_plan_string(&after_optimize), expected); diff --git a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs index b0b5f731063f..6c778c88934c 100644 --- a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs +++ b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs @@ -121,6 +121,7 @@ fn test_update_matching_exprs() -> Result<()> { )), ], DataType::Int32, + Arc::clone(ConfigOptions::default_singleton_arc()), )), Arc::new(CaseExpr::try_new( Some(Arc::new(Column::new("d", 2))), @@ -186,6 +187,7 @@ fn test_update_matching_exprs() -> Result<()> { )), ], DataType::Int32, + Arc::clone(ConfigOptions::default_singleton_arc()), )), Arc::new(CaseExpr::try_new( Some(Arc::new(Column::new("d", 3))), @@ -254,6 +256,7 @@ fn test_update_projected_exprs() -> Result<()> { )), ], DataType::Int32, + Arc::clone(ConfigOptions::default_singleton_arc()), )), Arc::new(CaseExpr::try_new( Some(Arc::new(Column::new("d", 2))), @@ -319,6 +322,7 @@ fn test_update_projected_exprs() -> Result<()> { )), ], DataType::Int32, + Arc::clone(ConfigOptions::default_singleton_arc()), )), Arc::new(CaseExpr::try_new( Some(Arc::new(Column::new("d_new", 3))), @@ -421,8 +425,8 @@ fn test_csv_after_projection() -> Result<()> { ]; assert_eq!(initial, expected_initial); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; let expected = ["DataSourceExec: file_groups={1 group: [[x]]}, projection=[b, d], file_type=csv, has_header=false"]; @@ -449,8 +453,8 @@ fn test_memory_after_projection() -> Result<()> { ]; assert_eq!(initial, expected_initial); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; let expected = ["DataSourceExec: partitions=0, partition_sizes=[]"]; assert_eq!(get_plan_string(&after_optimize), expected); @@ -535,8 +539,8 @@ fn test_streaming_table_after_projection() -> Result<()> { Arc::new(streaming_table) as _, )?) as _; - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; let result = after_optimize .as_any() @@ -624,8 +628,8 @@ fn test_projection_after_projection() -> Result<()> { ]; assert_eq!(initial, expected_initial); - let after_optimize = - ProjectionPushdown::new().optimize(top_projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(top_projection, ConfigOptions::default_singleton())?; let expected = [ "ProjectionExec: expr=[b@1 as new_b, c@2 + e@4 as binary, b@1 as newest_b]", @@ -677,8 +681,8 @@ fn test_output_req_after_projection() -> Result<()> { ]; assert_eq!(initial, expected_initial); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; let expected: [&str; 3] = [ "OutputRequirementExec", @@ -754,8 +758,8 @@ fn test_coalesce_partitions_after_projection() -> Result<()> { ]; assert_eq!(initial, expected_initial); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; let expected = [ "CoalescePartitionsExec", @@ -801,8 +805,8 @@ fn test_filter_after_projection() -> Result<()> { ]; assert_eq!(initial, expected_initial); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; let expected = [ "FilterExec: b@1 - a_new@0 > d@2 - a_new@0", @@ -887,8 +891,8 @@ fn test_join_after_projection() -> Result<()> { ]; assert_eq!(initial, expected_initial); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; let expected = [ "SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b_from_left@1, c_from_right@1)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2", @@ -1006,8 +1010,8 @@ fn test_join_after_required_projection() -> Result<()> { ]; assert_eq!(initial, expected_initial); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; let expected = [ "ProjectionExec: expr=[a@5 as a, b@6 as b, c@7 as c, d@8 as d, e@9 as e, a@0 as a, b@1 as b, c@2 as c, d@3 as d, e@4 as e]", @@ -1074,8 +1078,8 @@ fn test_nested_loop_join_after_projection() -> Result<()> { ]; assert_eq!(initial, expected_initial); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; let expected = [ "NestedLoopJoinExec: join_type=Inner, filter=a@0 < b@1, projection=[c@2]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false", @@ -1155,8 +1159,8 @@ fn test_hash_join_after_projection() -> Result<()> { ]; assert_eq!(initial, expected_initial); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; // HashJoinExec only returns result after projection. Because there are some alias columns in the projection, the ProjectionExec is not removed. let expected = ["ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left, c@3 as c_from_right]", " HashJoinExec: mode=Auto, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2, projection=[a@0, b@1, c@2, c@7]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"]; @@ -1172,8 +1176,8 @@ fn test_hash_join_after_projection() -> Result<()> { join.clone(), )?); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; // Comparing to the previous result, this projection don't have alias columns either change the order of output fields. So the ProjectionExec is removed. let expected = ["HashJoinExec: mode=Auto, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2, projection=[a@0, b@1, c@2, c@7]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false"]; @@ -1212,8 +1216,8 @@ fn test_repartition_after_projection() -> Result<()> { ]; assert_eq!(initial, expected_initial); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; let expected = [ "RepartitionExec: partitioning=Hash([a@1, b_new@0, d_new@2], 6), input_partitions=1", @@ -1279,8 +1283,8 @@ fn test_sort_after_projection() -> Result<()> { ]; assert_eq!(initial, expected_initial); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; let expected = [ "SortExec: expr=[b@2 ASC, c@0 + new_a@1 ASC], preserve_partitioning=[false]", @@ -1329,8 +1333,8 @@ fn test_sort_preserving_after_projection() -> Result<()> { ]; assert_eq!(initial, expected_initial); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; let expected = [ "SortPreservingMergeExec: [b@2 ASC, c@0 + new_a@1 ASC]", @@ -1366,8 +1370,8 @@ fn test_union_after_projection() -> Result<()> { ]; assert_eq!(initial, expected_initial); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; let expected = [ "UnionExec", @@ -1425,8 +1429,8 @@ fn test_partition_col_projection_pushdown() -> Result<()> { source, )?); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; let expected = [ "ProjectionExec: expr=[string_col@1 as string_col, partition_col@2 as partition_col, int_col@0 as int_col]", @@ -1465,8 +1469,8 @@ fn test_partition_col_projection_pushdown_expr() -> Result<()> { source, )?); - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let after_optimize = ProjectionPushdown::new() + .optimize(projection, ConfigOptions::default_singleton())?; let expected = [ "ProjectionExec: expr=[string_col@1 as string_col, CAST(partition_col@2 AS Utf8View) as partition_col, int_col@0 as int_col]", diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs index 91b5f0157739..2c53b6bad5d2 100644 --- a/datafusion/datasource/src/file_scan_config.rs +++ b/datafusion/datasource/src/file_scan_config.rs @@ -86,7 +86,7 @@ use crate::{ /// # // Note: crate mock ParquetSource, as ParquetSource is not in the datasource crate /// # struct ParquetSource { /// # projected_statistics: Option -/// # }; +/// # } /// # impl FileSource for ParquetSource { /// # fn create_file_opener(&self, _: Arc, _: &FileScanConfig, _: usize) -> Arc { unimplemented!() } /// # fn as_any(&self) -> &dyn Any { self } @@ -1087,6 +1087,7 @@ mod tests { compute::SortOptions, }; + use datafusion_common::config::ConfigOptions; use datafusion_common::stats::Precision; use datafusion_common::{assert_batches_eq, DFSchema}; use datafusion_expr::{execution_props::ExecutionProps, SortExpr}; @@ -1097,6 +1098,7 @@ mod tests { e: &SortExpr, input_dfschema: &DFSchema, execution_props: &ExecutionProps, + config_options: &Arc, ) -> Result { let SortExpr { expr, @@ -1104,7 +1106,12 @@ mod tests { nulls_first, } = e; Ok(PhysicalSortExpr { - expr: create_physical_expr(expr, input_dfschema, execution_props)?, + expr: create_physical_expr( + expr, + input_dfschema, + execution_props, + config_options, + )?, options: SortOptions { descending: !asc, nulls_first: *nulls_first, @@ -1635,6 +1642,7 @@ mod tests { &expr, &DFSchema::try_from(table_schema.as_ref().clone())?, &ExecutionProps::default(), + ConfigOptions::default_singleton_arc(), ) }) .collect::>>()?, diff --git a/datafusion/expr-common/src/accumulator.rs b/datafusion/expr-common/src/accumulator.rs index dc1e023d4c3c..d23dade480a2 100644 --- a/datafusion/expr-common/src/accumulator.rs +++ b/datafusion/expr-common/src/accumulator.rs @@ -26,7 +26,7 @@ use std::fmt::Debug; /// `Accumulator`s are stateful objects that implement a single group. They /// aggregate values from multiple rows together into a final output aggregate. /// -/// [`GroupsAccumulator]` is an additional more performant (but also complex) API +/// [`GroupsAccumulator`] is an additional more performant (but also complex) API /// that manages state for multiple groups at once. /// /// An accumulator knows how to: @@ -42,7 +42,7 @@ use std::fmt::Debug; /// [`state`] and combine the state from multiple accumulators /// via [`merge_batch`], as part of efficient multi-phase grouping. /// -/// [`GroupsAccumulator`]: crate::GroupsAccumulator +/// [`GroupsAccumulator`]: crate::groups_accumulator::GroupsAccumulator /// [`update_batch`]: Self::update_batch /// [`retract_batch`]: Self::retract_batch /// [`state`]: Self::state diff --git a/datafusion/expr/src/simplify.rs b/datafusion/expr/src/simplify.rs index 467ce8bf53e2..46716c113e45 100644 --- a/datafusion/expr/src/simplify.rs +++ b/datafusion/expr/src/simplify.rs @@ -17,10 +17,11 @@ //! Structs and traits to provide the information needed for expression simplification. +use crate::{execution_props::ExecutionProps, Expr, ExprSchemable}; use arrow::datatypes::DataType; +use datafusion_common::config::ConfigOptions; use datafusion_common::{DFSchemaRef, DataFusionError, Result}; - -use crate::{execution_props::ExecutionProps, Expr, ExprSchemable}; +use std::sync::Arc; /// Provides the information necessary to apply algebraic simplification to an /// [Expr]. See [SimplifyContext] for one concrete implementation. @@ -38,6 +39,9 @@ pub trait SimplifyInfo { /// Returns details needed for partial expression evaluation fn execution_props(&self) -> &ExecutionProps; + /// Returns the config options + fn config_options(&self) -> &Arc; + /// Returns data type of this expr needed for determining optimized int type of a value fn get_data_type(&self, expr: &Expr) -> Result; } @@ -53,14 +57,19 @@ pub trait SimplifyInfo { pub struct SimplifyContext<'a> { schema: Option, props: &'a ExecutionProps, + config_options: &'a Arc, } impl<'a> SimplifyContext<'a> { /// Create a new SimplifyContext - pub fn new(props: &'a ExecutionProps) -> Self { + pub fn new( + props: &'a ExecutionProps, + config_options: &'a Arc, + ) -> Self { Self { schema: None, props, + config_options, } } @@ -106,6 +115,10 @@ impl SimplifyInfo for SimplifyContext<'_> { fn execution_props(&self) -> &ExecutionProps { self.props } + + fn config_options(&self) -> &Arc { + self.config_options + } } /// Was the expression simplified? diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 86bc5852b830..16efed0cd9bf 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -24,6 +24,7 @@ use crate::{ ColumnarValue, Documentation, Expr, ScalarFunctionImplementation, Signature, }; use arrow::datatypes::DataType; +use datafusion_common::config::ConfigOptions; use datafusion_common::{not_impl_err, ExprSchema, Result, ScalarValue}; use datafusion_expr_common::interval_arithmetic::Interval; use std::any::Any; @@ -338,6 +339,8 @@ pub struct ScalarFunctionArgs<'a> { /// The return type of the scalar function returned (from `return_type` or `return_type_from_args`) /// when creating the physical expression from the logical expression pub return_type: &'a DataType, + // The config options which can be used to lookup configuration properties + pub config_options: &'a ConfigOptions, } /// Information about arguments passed to the function diff --git a/datafusion/ffi/src/plan_properties.rs b/datafusion/ffi/src/plan_properties.rs index 3592c16b8fab..448dbaf9e5f7 100644 --- a/datafusion/ffi/src/plan_properties.rs +++ b/datafusion/ffi/src/plan_properties.rs @@ -26,6 +26,7 @@ use abi_stable::{ }; use arrow::datatypes::SchemaRef; use datafusion::{ + config::ConfigOptions, error::{DataFusionError, Result}, physical_expr::EquivalenceProperties, physical_plan::{ @@ -179,9 +180,10 @@ impl TryFrom for PlanProperties { let ffi_schema = unsafe { (ffi_props.schema)(&ffi_props) }; let schema = (&ffi_schema.0).try_into()?; - // TODO Extend FFI to get the registry and codex + // TODO Extend FFI to get the registry, config_options and codex let default_ctx = SessionContext::new(); let codex = DefaultPhysicalExtensionCodec {}; + let config_options = ConfigOptions::new(); let ffi_orderings = unsafe { (ffi_props.output_ordering)(&ffi_props) }; @@ -191,6 +193,7 @@ impl TryFrom for PlanProperties { let orderings = Some(parse_physical_sort_exprs( &proto_output_ordering.physical_sort_expr_nodes, &default_ctx, + &config_options, &schema, &codex, )?); @@ -203,6 +206,7 @@ impl TryFrom for PlanProperties { let partitioning = parse_protobuf_partitioning( Some(&proto_output_partitioning), &default_ctx, + &config_options, &schema, &codex, )? diff --git a/datafusion/ffi/src/udf.rs b/datafusion/ffi/src/udf.rs index bbc9cf936cee..8f9040218a35 100644 --- a/datafusion/ffi/src/udf.rs +++ b/datafusion/ffi/src/udf.rs @@ -28,7 +28,7 @@ use arrow::{ ffi::{from_ffi, to_ffi, FFI_ArrowSchema}, }; use datafusion::{ - error::DataFusionError, + config::ConfigOptions, error::DataFusionError, logical_expr::type_coercion::functions::data_types_with_scalar_udf, }; use datafusion::{ @@ -78,7 +78,7 @@ pub struct FFI_ScalarUDF { /// See [`ScalarUDFImpl`] for details on short_circuits pub short_circuits: bool, - /// Performs type coersion. To simply this interface, all UDFs are treated as having + /// Performs type coercion. To simply this interface, all UDFs are treated as having /// user defined signatures, which will in turn call coerce_types to be called. This /// call should be transparent to most users as the internal function performs the /// appropriate calls on the underlying [`ScalarUDF`] @@ -157,10 +157,14 @@ unsafe extern "C" fn invoke_with_args_fn_wrapper( let args = rresult_return!(args); let return_type = rresult_return!(DataType::try_from(&return_type.0)); + // TODO Extend FFI to get the config_options + let config_options = ConfigOptions::new(); + let args = ScalarFunctionArgs { args, number_rows, return_type: &return_type, + config_options: &config_options, }; let result = rresult_return!(udf @@ -286,6 +290,7 @@ impl ScalarUDFImpl for ForeignScalarUDF { args, number_rows, return_type, + config_options: _config_options, } = invoke_args; let args = args diff --git a/datafusion/functions-nested/benches/map.rs b/datafusion/functions-nested/benches/map.rs index 2774b24b902a..56adfd60c37a 100644 --- a/datafusion/functions-nested/benches/map.rs +++ b/datafusion/functions-nested/benches/map.rs @@ -21,16 +21,16 @@ use arrow::array::{Int32Array, ListArray, StringArray}; use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use rand::prelude::ThreadRng; -use rand::Rng; -use std::collections::HashSet; -use std::sync::Arc; - +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::planner::ExprPlanner; use datafusion_expr::{ColumnarValue, Expr, ScalarFunctionArgs}; use datafusion_functions_nested::map::map_udf; use datafusion_functions_nested::planner::NestedFunctionPlanner; +use rand::prelude::ThreadRng; +use rand::Rng; +use std::collections::HashSet; +use std::sync::Arc; fn keys(rng: &mut ThreadRng) -> Vec { let mut keys = HashSet::with_capacity(1000); @@ -97,6 +97,7 @@ fn criterion_benchmark(c: &mut Criterion) { let return_type = &map_udf() .return_type(&[DataType::Utf8, DataType::Int32]) .expect("should get return type"); + let config_options = ConfigOptions::default_singleton(); b.iter(|| { black_box( @@ -105,6 +106,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![keys.clone(), values.clone()], number_rows: 1, return_type, + config_options, }) .expect("map should work on valid values"), ); diff --git a/datafusion/functions/benches/character_length.rs b/datafusion/functions/benches/character_length.rs index bbcfed021064..6479973f8834 100644 --- a/datafusion/functions/benches/character_length.rs +++ b/datafusion/functions/benches/character_length.rs @@ -19,6 +19,7 @@ extern crate criterion; use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarFunctionArgs; use helper::gen_string_array; @@ -34,6 +35,8 @@ fn criterion_benchmark(c: &mut Criterion) { for str_len in [8, 32, 128, 4096] { // StringArray ASCII only let args_string_ascii = gen_string_array(n_rows, str_len, 0.1, 0.0, false); + let config_options = ConfigOptions::default_singleton_arc(); + c.bench_function( &format!("character_length_StringArray_ascii_str_len_{}", str_len), |b| { @@ -42,6 +45,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_string_ascii.clone(), number_rows: n_rows, return_type: &return_type, + config_options, })) }) }, @@ -57,6 +61,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_string_utf8.clone(), number_rows: n_rows, return_type: &return_type, + config_options, })) }) }, @@ -72,6 +77,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_string_view_ascii.clone(), number_rows: n_rows, return_type: &return_type, + config_options, })) }) }, @@ -87,6 +93,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_string_view_utf8.clone(), number_rows: n_rows, return_type: &return_type, + config_options, })) }) }, diff --git a/datafusion/functions/benches/chr.rs b/datafusion/functions/benches/chr.rs index 4750fb466653..1c58a610ac46 100644 --- a/datafusion/functions/benches/chr.rs +++ b/datafusion/functions/benches/chr.rs @@ -24,6 +24,7 @@ use datafusion_functions::string::chr; use rand::Rng; use arrow::datatypes::DataType; +use datafusion_common::config::ConfigOptions; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { @@ -44,6 +45,8 @@ fn criterion_benchmark(c: &mut Criterion) { }; let input = Arc::new(input); let args = vec![ColumnarValue::Array(input)]; + let config_options = ConfigOptions::default_singleton_arc(); + c.bench_function("chr", |b| { b.iter(|| { black_box( @@ -52,6 +55,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8, + config_options, }) .unwrap(), ) diff --git a/datafusion/functions/benches/concat.rs b/datafusion/functions/benches/concat.rs index 45ca076e754f..2c260c79fc34 100644 --- a/datafusion/functions/benches/concat.rs +++ b/datafusion/functions/benches/concat.rs @@ -19,6 +19,7 @@ use arrow::array::ArrayRef; use arrow::datatypes::DataType; use arrow::util::bench_util::create_string_array_with_len; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string::concat; @@ -35,6 +36,8 @@ fn create_args(size: usize, str_len: usize) -> Vec { } fn criterion_benchmark(c: &mut Criterion) { + let config_options = ConfigOptions::default_singleton_arc(); + for size in [1024, 4096, 8192] { let args = create_args(size, 32); let mut group = c.benchmark_group("concat function"); @@ -47,6 +50,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: size, return_type: &DataType::Utf8, + config_options, }) .unwrap(), ) diff --git a/datafusion/functions/benches/cot.rs b/datafusion/functions/benches/cot.rs index b2a9ca0b9f47..e93e1d43750b 100644 --- a/datafusion/functions/benches/cot.rs +++ b/datafusion/functions/benches/cot.rs @@ -17,19 +17,22 @@ extern crate criterion; +use arrow::datatypes::DataType; use arrow::{ datatypes::{Float32Type, Float64Type}, util::bench_util::create_primitive_array, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::cot; -use arrow::datatypes::DataType; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let cot_fn = cot(); + let config_options = ConfigOptions::default_singleton_arc(); + for size in [1024, 4096, 8192] { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); let f32_args = vec![ColumnarValue::Array(f32_array)]; @@ -41,6 +44,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: f32_args.clone(), number_rows: size, return_type: &DataType::Float32, + config_options, }) .unwrap(), ) @@ -56,6 +60,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: f64_args.clone(), number_rows: size, return_type: &DataType::Float64, + config_options, }) .unwrap(), ) diff --git a/datafusion/functions/benches/date_bin.rs b/datafusion/functions/benches/date_bin.rs index 7ea5fdcb2be2..f8c6c875fa71 100644 --- a/datafusion/functions/benches/date_bin.rs +++ b/datafusion/functions/benches/date_bin.rs @@ -21,12 +21,12 @@ use std::sync::Arc; use arrow::array::{Array, ArrayRef, TimestampSecondArray}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; -use rand::rngs::ThreadRng; -use rand::Rng; - use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::date_bin; +use rand::rngs::ThreadRng; +use rand::Rng; fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray { let mut seconds = vec![]; @@ -48,6 +48,7 @@ fn criterion_benchmark(c: &mut Criterion) { let return_type = udf .return_type(&[interval.data_type(), timestamps.data_type()]) .unwrap(); + let config_options = ConfigOptions::default_singleton_arc(); b.iter(|| { black_box( @@ -55,6 +56,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![interval.clone(), timestamps.clone()], number_rows: batch_len, return_type: &return_type, + config_options, }) .expect("date_bin should work on valid values"), ) diff --git a/datafusion/functions/benches/date_trunc.rs b/datafusion/functions/benches/date_trunc.rs index e7e96fb7a9fa..da4d493054aa 100644 --- a/datafusion/functions/benches/date_trunc.rs +++ b/datafusion/functions/benches/date_trunc.rs @@ -21,12 +21,12 @@ use std::sync::Arc; use arrow::array::{Array, ArrayRef, TimestampSecondArray}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; -use rand::rngs::ThreadRng; -use rand::Rng; - use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::date_trunc; +use rand::rngs::ThreadRng; +use rand::Rng; fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray { let mut seconds = vec![]; @@ -50,12 +50,15 @@ fn criterion_benchmark(c: &mut Criterion) { let return_type = &udf .return_type(&args.iter().map(|arg| arg.data_type()).collect::>()) .unwrap(); + let config_options = ConfigOptions::default_singleton_arc(); + b.iter(|| { black_box( udf.invoke_with_args(ScalarFunctionArgs { args: args.clone(), number_rows: batch_len, return_type, + config_options, }) .expect("date_trunc should work on valid values"), ) diff --git a/datafusion/functions/benches/encoding.rs b/datafusion/functions/benches/encoding.rs index cf8f8d2fd62c..427e64e8fb37 100644 --- a/datafusion/functions/benches/encoding.rs +++ b/datafusion/functions/benches/encoding.rs @@ -20,12 +20,15 @@ extern crate criterion; use arrow::datatypes::DataType; use arrow::util::bench_util::create_string_array_with_len; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::encoding; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let decode = encoding::decode(); + let config_options = ConfigOptions::default_singleton_arc(); + for size in [1024, 4096, 8192] { let str_array = Arc::new(create_string_array_with_len::(size, 0.2, 32)); c.bench_function(&format!("base64_decode/{size}"), |b| { @@ -35,6 +38,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![ColumnarValue::Array(str_array.clone()), method.clone()], number_rows: size, return_type: &DataType::Utf8, + config_options, }) .unwrap(); @@ -46,6 +50,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8, + config_options, }) .unwrap(), ) @@ -59,6 +64,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![ColumnarValue::Array(str_array.clone()), method.clone()], number_rows: size, return_type: &DataType::Utf8, + config_options, }) .unwrap(); @@ -70,6 +76,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8, + config_options, }) .unwrap(), ) diff --git a/datafusion/functions/benches/find_in_set.rs b/datafusion/functions/benches/find_in_set.rs index 9307525482c2..51bf905c3212 100644 --- a/datafusion/functions/benches/find_in_set.rs +++ b/datafusion/functions/benches/find_in_set.rs @@ -23,6 +23,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use rand::distributions::Alphanumeric; @@ -144,6 +145,7 @@ fn gen_args_scalar( fn criterion_benchmark(c: &mut Criterion) { // All benches are single batch run with 8192 rows let find_in_set = datafusion_functions::unicode::find_in_set(); + let config_options = ConfigOptions::default_singleton(); let n_rows = 8192; for str_len in [8, 32, 1024] { @@ -159,6 +161,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: n_rows, return_type: &DataType::Int32, + config_options, })) }) }); @@ -170,6 +173,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: n_rows, return_type: &DataType::Int32, + config_options, })) }) }); @@ -185,6 +189,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: n_rows, return_type: &DataType::Int32, + config_options, })) }) }); @@ -196,6 +201,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: n_rows, return_type: &DataType::Int32, + config_options, })) }) }); diff --git a/datafusion/functions/benches/gcd.rs b/datafusion/functions/benches/gcd.rs index f8c855c82ad4..3d56e2a8a733 100644 --- a/datafusion/functions/benches/gcd.rs +++ b/datafusion/functions/benches/gcd.rs @@ -22,6 +22,7 @@ use arrow::{ datatypes::DataType, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::gcd; @@ -41,6 +42,7 @@ fn criterion_benchmark(c: &mut Criterion) { let array_a = ColumnarValue::Array(generate_i64_array(n_rows)); let array_b = ColumnarValue::Array(generate_i64_array(n_rows)); let udf = gcd(); + let config_options = ConfigOptions::default_singleton_arc(); c.bench_function("gcd both array", |b| { b.iter(|| { @@ -49,6 +51,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![array_a.clone(), array_b.clone()], number_rows: 0, return_type: &DataType::Int64, + config_options, }) .expect("date_bin should work on valid values"), ) @@ -65,6 +68,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![array_a.clone(), scalar_b.clone()], number_rows: 0, return_type: &DataType::Int64, + config_options, }) .expect("date_bin should work on valid values"), ) @@ -81,6 +85,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![scalar_a.clone(), scalar_b.clone()], number_rows: 0, return_type: &DataType::Int64, + config_options, }) .expect("date_bin should work on valid values"), ) diff --git a/datafusion/functions/benches/initcap.rs b/datafusion/functions/benches/initcap.rs index 97c76831b33c..295a4e08bddc 100644 --- a/datafusion/functions/benches/initcap.rs +++ b/datafusion/functions/benches/initcap.rs @@ -23,6 +23,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::unicode; use std::sync::Arc; @@ -47,6 +48,8 @@ fn create_args( fn criterion_benchmark(c: &mut Criterion) { let initcap = unicode::initcap(); + let config_options = ConfigOptions::default_singleton(); + for size in [1024, 4096] { let args = create_args::(size, 8, true); c.bench_function( @@ -57,6 +60,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8View, + config_options, })) }) }, @@ -71,6 +75,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8View, + config_options, })) }) }, @@ -83,6 +88,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8, + config_options, })) }) }); diff --git a/datafusion/functions/benches/isnan.rs b/datafusion/functions/benches/isnan.rs index 42004cc24f69..c9309aa382e0 100644 --- a/datafusion/functions/benches/isnan.rs +++ b/datafusion/functions/benches/isnan.rs @@ -23,12 +23,15 @@ use arrow::{ util::bench_util::create_primitive_array, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::isnan; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let isnan = isnan(); + let config_options = ConfigOptions::default_singleton(); + for size in [1024, 4096, 8192] { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); let f32_args = vec![ColumnarValue::Array(f32_array)]; @@ -40,6 +43,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: f32_args.clone(), number_rows: size, return_type: &DataType::Boolean, + config_options, }) .unwrap(), ) @@ -55,6 +59,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: f64_args.clone(), number_rows: size, return_type: &DataType::Boolean, + config_options, }) .unwrap(), ) diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs index 9e5f6a84804b..d44718feb109 100644 --- a/datafusion/functions/benches/iszero.rs +++ b/datafusion/functions/benches/iszero.rs @@ -23,12 +23,15 @@ use arrow::{ util::bench_util::create_primitive_array, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::iszero; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let iszero = iszero(); + let config_options = ConfigOptions::default_singleton(); + for size in [1024, 4096, 8192] { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); let batch_len = f32_array.len(); @@ -41,6 +44,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: f32_args.clone(), number_rows: batch_len, return_type: &DataType::Boolean, + config_options, }) .unwrap(), ) @@ -57,6 +61,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: f64_args.clone(), number_rows: batch_len, return_type: &DataType::Boolean, + config_options, }) .unwrap(), ) diff --git a/datafusion/functions/benches/lower.rs b/datafusion/functions/benches/lower.rs index 534e5739225d..94330ff129c4 100644 --- a/datafusion/functions/benches/lower.rs +++ b/datafusion/functions/benches/lower.rs @@ -23,6 +23,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string; use std::sync::Arc; @@ -122,6 +123,8 @@ fn create_args5( fn criterion_benchmark(c: &mut Criterion) { let lower = string::lower(); + let config_options = ConfigOptions::default_singleton(); + for size in [1024, 4096, 8192] { let args = create_args1(size, 32); c.bench_function(&format!("lower_all_values_are_ascii: {}", size), |b| { @@ -131,6 +134,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: size, return_type: &DataType::Utf8, + config_options, })) }) }); @@ -145,6 +149,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: size, return_type: &DataType::Utf8, + config_options, })) }) }, @@ -160,6 +165,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: size, return_type: &DataType::Utf8, + config_options, })) }) }, @@ -185,6 +191,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: size, return_type: &DataType::Utf8, + config_options, })) }), ); @@ -199,6 +206,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: size, return_type: &DataType::Utf8, + config_options, })) }), ); @@ -213,6 +221,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: size, return_type: &DataType::Utf8, + config_options, })) }), ); diff --git a/datafusion/functions/benches/ltrim.rs b/datafusion/functions/benches/ltrim.rs index 457fb499f5a1..74533b0b095b 100644 --- a/datafusion/functions/benches/ltrim.rs +++ b/datafusion/functions/benches/ltrim.rs @@ -23,6 +23,7 @@ use criterion::{ black_box, criterion_group, criterion_main, measurement::Measurement, BenchmarkGroup, Criterion, SamplingMode, }; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDF}; use datafusion_functions::string; @@ -136,6 +137,8 @@ fn run_with_string_type( string_type: StringArrayType, ) { let args = create_args(size, characters, trimmed, remaining_len, string_type); + let config_options = ConfigOptions::default_singleton(); + group.bench_function( format!( "{string_type} [size={size}, len_before={len}, len_after={remaining_len}]", @@ -147,6 +150,7 @@ fn run_with_string_type( args: args_cloned, number_rows: size, return_type: &DataType::Utf8, + config_options, })) }) }, diff --git a/datafusion/functions/benches/make_date.rs b/datafusion/functions/benches/make_date.rs index 8dd7a7a59773..49603a88d00b 100644 --- a/datafusion/functions/benches/make_date.rs +++ b/datafusion/functions/benches/make_date.rs @@ -22,12 +22,12 @@ use std::sync::Arc; use arrow::array::{Array, ArrayRef, Int32Array}; use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use rand::rngs::ThreadRng; -use rand::Rng; - +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::make_date; +use rand::rngs::ThreadRng; +use rand::Rng; fn years(rng: &mut ThreadRng) -> Int32Array { let mut years = vec![]; @@ -63,6 +63,7 @@ fn criterion_benchmark(c: &mut Criterion) { let years = ColumnarValue::Array(years_array); let months = ColumnarValue::Array(Arc::new(months(&mut rng)) as ArrayRef); let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); + let config_options = ConfigOptions::default_singleton(); b.iter(|| { black_box( @@ -71,6 +72,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![years.clone(), months.clone(), days.clone()], number_rows: batch_len, return_type: &DataType::Date32, + config_options, }) .expect("make_date should work on valid values"), ) @@ -84,6 +86,7 @@ fn criterion_benchmark(c: &mut Criterion) { let batch_len = months_arr.len(); let months = ColumnarValue::Array(months_arr); let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); + let config_options = ConfigOptions::default_singleton(); b.iter(|| { black_box( @@ -92,6 +95,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![year.clone(), months.clone(), days.clone()], number_rows: batch_len, return_type: &DataType::Date32, + config_options, }) .expect("make_date should work on valid values"), ) @@ -105,6 +109,7 @@ fn criterion_benchmark(c: &mut Criterion) { let day_arr = Arc::new(days(&mut rng)); let batch_len = day_arr.len(); let days = ColumnarValue::Array(day_arr); + let config_options = ConfigOptions::default_singleton(); b.iter(|| { black_box( @@ -113,6 +118,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![year.clone(), month.clone(), days.clone()], number_rows: batch_len, return_type: &DataType::Date32, + config_options, }) .expect("make_date should work on valid values"), ) @@ -123,6 +129,7 @@ fn criterion_benchmark(c: &mut Criterion) { let year = ColumnarValue::Scalar(ScalarValue::Int32(Some(2025))); let month = ColumnarValue::Scalar(ScalarValue::Int32(Some(11))); let day = ColumnarValue::Scalar(ScalarValue::Int32(Some(26))); + let config_options = ConfigOptions::default_singleton(); b.iter(|| { black_box( @@ -131,6 +138,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![year.clone(), month.clone(), day.clone()], number_rows: 1, return_type: &DataType::Date32, + config_options, }) .expect("make_date should work on valid values"), ) diff --git a/datafusion/functions/benches/nullif.rs b/datafusion/functions/benches/nullif.rs index 9096c976bf31..cbeb79293ca3 100644 --- a/datafusion/functions/benches/nullif.rs +++ b/datafusion/functions/benches/nullif.rs @@ -20,6 +20,7 @@ extern crate criterion; use arrow::datatypes::DataType; use arrow::util::bench_util::create_string_array_with_len; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::core::nullif; @@ -27,6 +28,8 @@ use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let nullif = nullif(); + let config_options = ConfigOptions::default_singleton_arc(); + for size in [1024, 4096, 8192] { let array = Arc::new(create_string_array_with_len::(size, 0.2, 32)); let args = vec![ @@ -41,6 +44,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8, + config_options, }) .unwrap(), ) diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs index f78a53fbee19..fbc31a8ba50b 100644 --- a/datafusion/functions/benches/pad.rs +++ b/datafusion/functions/benches/pad.rs @@ -21,6 +21,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::unicode::{lpad, rpad}; use rand::distributions::{Distribution, Uniform}; @@ -96,6 +97,8 @@ fn create_args( } fn criterion_benchmark(c: &mut Criterion) { + let config_options = ConfigOptions::default_singleton_arc(); + for size in [1024, 2048] { let mut group = c.benchmark_group("lpad function"); @@ -108,6 +111,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8, + config_options, }) .unwrap(), ) @@ -123,6 +127,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::LargeUtf8, + config_options, }) .unwrap(), ) @@ -138,6 +143,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8, + config_options, }) .unwrap(), ) @@ -157,6 +163,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8, + config_options, }) .unwrap(), ) @@ -172,6 +179,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::LargeUtf8, + config_options, }) .unwrap(), ) @@ -188,6 +196,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8, + config_options, }) .unwrap(), ) diff --git a/datafusion/functions/benches/random.rs b/datafusion/functions/benches/random.rs index 78ebf23e02e0..e96e0711b459 100644 --- a/datafusion/functions/benches/random.rs +++ b/datafusion/functions/benches/random.rs @@ -19,11 +19,13 @@ extern crate criterion; use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl}; use datafusion_functions::math::random::RandomFunc; fn criterion_benchmark(c: &mut Criterion) { let random_func = RandomFunc::new(); + let config_options = ConfigOptions::default_singleton_arc(); // Benchmark to evaluate 1M rows in batch size 8192 let iterations = 1_000_000 / 8192; // Calculate how many iterations are needed to reach approximately 1M rows @@ -36,6 +38,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![], number_rows: 8192, return_type: &DataType::Float64, + config_options, }) .unwrap(), ); @@ -54,6 +57,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![], number_rows: 128, return_type: &DataType::Float64, + config_options, }) .unwrap(), ); diff --git a/datafusion/functions/benches/repeat.rs b/datafusion/functions/benches/repeat.rs index 5cc6a177d9d9..93987cd0fa28 100644 --- a/datafusion/functions/benches/repeat.rs +++ b/datafusion/functions/benches/repeat.rs @@ -23,6 +23,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string; use std::sync::Arc; @@ -58,6 +59,8 @@ fn create_args( fn criterion_benchmark(c: &mut Criterion) { let repeat = string::repeat(); + let config_options = ConfigOptions::default_singleton_arc(); + for size in [1024, 4096] { // REPEAT 3 TIMES let repeat_times = 3; @@ -79,6 +82,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: repeat_times as usize, return_type: &DataType::Utf8, + config_options, })) }) }, @@ -97,6 +101,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: repeat_times as usize, return_type: &DataType::Utf8, + config_options, })) }) }, @@ -115,6 +120,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: repeat_times as usize, return_type: &DataType::Utf8, + config_options, })) }) }, @@ -142,6 +148,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: repeat_times as usize, return_type: &DataType::Utf8, + config_options, })) }) }, @@ -160,6 +167,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: repeat_times as usize, return_type: &DataType::Utf8, + config_options, })) }) }, @@ -178,6 +186,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: repeat_times as usize, return_type: &DataType::Utf8, + config_options, })) }) }, @@ -205,6 +214,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: repeat_times as usize, return_type: &DataType::Utf8, + config_options, })) }) }, diff --git a/datafusion/functions/benches/reverse.rs b/datafusion/functions/benches/reverse.rs index d61f8fb80517..aac6792fb204 100644 --- a/datafusion/functions/benches/reverse.rs +++ b/datafusion/functions/benches/reverse.rs @@ -20,6 +20,7 @@ mod helper; use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarFunctionArgs; use helper::gen_string_array; @@ -31,6 +32,9 @@ fn criterion_benchmark(c: &mut Criterion) { const NULL_DENSITY: f32 = 0.1; const UTF8_DENSITY_OF_ALL_ASCII: f32 = 0.0; const NORMAL_UTF8_DENSITY: f32 = 0.8; + + let config_options = ConfigOptions::default_singleton_arc(); + for str_len in [8, 32, 128, 4096] { // StringArray ASCII only let args_string_ascii = gen_string_array( @@ -48,6 +52,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_string_ascii.clone(), number_rows: N_ROWS, return_type: &DataType::Utf8, + config_options, })) }) }, @@ -67,6 +72,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_string_utf8.clone(), number_rows: N_ROWS, return_type: &DataType::Utf8, + config_options, })) }) }, @@ -88,6 +94,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_string_view_ascii.clone(), number_rows: N_ROWS, return_type: &DataType::Utf8, + config_options, })) }) }, @@ -107,6 +114,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_string_view_utf8.clone(), number_rows: N_ROWS, return_type: &DataType::Utf8, + config_options, })) }) }, diff --git a/datafusion/functions/benches/signum.rs b/datafusion/functions/benches/signum.rs index 01939fad5f34..d83bef7803d2 100644 --- a/datafusion/functions/benches/signum.rs +++ b/datafusion/functions/benches/signum.rs @@ -23,12 +23,15 @@ use arrow::{ util::bench_util::create_primitive_array, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::signum; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let signum = signum(); + let config_options = ConfigOptions::default_singleton_arc(); + for size in [1024, 4096, 8192] { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); let batch_len = f32_array.len(); @@ -41,6 +44,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: f32_args.clone(), number_rows: batch_len, return_type: &DataType::Float32, + config_options, }) .unwrap(), ) @@ -58,6 +62,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: f64_args.clone(), number_rows: batch_len, return_type: &DataType::Float64, + config_options, }) .unwrap(), ) diff --git a/datafusion/functions/benches/strpos.rs b/datafusion/functions/benches/strpos.rs index df57c229e0ad..13ba2e24dec7 100644 --- a/datafusion/functions/benches/strpos.rs +++ b/datafusion/functions/benches/strpos.rs @@ -20,6 +20,7 @@ extern crate criterion; use arrow::array::{StringArray, StringViewArray}; use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use rand::distributions::Alphanumeric; use rand::prelude::StdRng; @@ -106,7 +107,7 @@ fn random_substring(chars: Chars) -> String { fn criterion_benchmark(c: &mut Criterion) { // All benches are single batch run with 8192 rows let strpos = datafusion_functions::unicode::strpos(); - + let config_options = ConfigOptions::default_singleton_arc(); let n_rows = 8192; for str_len in [8, 32, 128, 4096] { // StringArray ASCII only @@ -119,6 +120,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_string_ascii.clone(), number_rows: n_rows, return_type: &DataType::Int32, + config_options, })) }) }, @@ -134,6 +136,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_string_utf8.clone(), number_rows: n_rows, return_type: &DataType::Int32, + config_options, })) }) }, @@ -149,6 +152,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_string_view_ascii.clone(), number_rows: n_rows, return_type: &DataType::Int32, + config_options, })) }) }, @@ -164,6 +168,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_string_view_utf8.clone(), number_rows: n_rows, return_type: &DataType::Int32, + config_options, })) }) }, diff --git a/datafusion/functions/benches/substr.rs b/datafusion/functions/benches/substr.rs index 80ab70ef71b0..fb10d9ea0398 100644 --- a/datafusion/functions/benches/substr.rs +++ b/datafusion/functions/benches/substr.rs @@ -23,6 +23,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::unicode; use std::sync::Arc; @@ -98,6 +99,8 @@ fn create_args_with_count( fn criterion_benchmark(c: &mut Criterion) { let substr = unicode::substr(); + let config_options = ConfigOptions::default_singleton_arc(); + for size in [1024, 4096] { // string_len = 12, substring_len=6 (see `create_args_without_count`) let len = 12; @@ -114,6 +117,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8View, + config_options, })) }) }, @@ -128,6 +132,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8View, + config_options, })) }) }, @@ -142,6 +147,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8View, + config_options, })) }) }, @@ -168,6 +174,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8View, + config_options, })) }) }, @@ -185,6 +192,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8View, + config_options, })) }) }, @@ -202,6 +210,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8View, + config_options, })) }) }, @@ -228,6 +237,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8View, + config_options, })) }) }, @@ -245,6 +255,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8View, + config_options, })) }) }, @@ -262,6 +273,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: size, return_type: &DataType::Utf8View, + config_options, })) }) }, diff --git a/datafusion/functions/benches/substr_index.rs b/datafusion/functions/benches/substr_index.rs index b1c1c3c34a95..d19750e5cc11 100644 --- a/datafusion/functions/benches/substr_index.rs +++ b/datafusion/functions/benches/substr_index.rs @@ -22,13 +22,13 @@ use std::sync::Arc; use arrow::array::{ArrayRef, Int64Array, StringArray}; use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; +use datafusion_functions::unicode::substr_index; use rand::distributions::{Alphanumeric, Uniform}; use rand::prelude::Distribution; use rand::Rng; -use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; -use datafusion_functions::unicode::substr_index; - struct Filter { dist: Dist, test: Test, @@ -89,8 +89,9 @@ fn criterion_benchmark(c: &mut Criterion) { let strings = ColumnarValue::Array(Arc::new(strings) as ArrayRef); let delimiters = ColumnarValue::Array(Arc::new(delimiters) as ArrayRef); let counts = ColumnarValue::Array(Arc::new(counts) as ArrayRef); - let args = vec![strings, delimiters, counts]; + let config_options = ConfigOptions::default_singleton_arc(); + b.iter(|| { black_box( substr_index() @@ -98,6 +99,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: batch_len, return_type: &DataType::Utf8, + config_options, }) .expect("substr_index should work on valid values"), ) diff --git a/datafusion/functions/benches/to_char.rs b/datafusion/functions/benches/to_char.rs index 6f20a20dc219..f831bef06a7a 100644 --- a/datafusion/functions/benches/to_char.rs +++ b/datafusion/functions/benches/to_char.rs @@ -28,6 +28,7 @@ use rand::rngs::ThreadRng; use rand::seq::SliceRandom; use rand::Rng; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_common::ScalarValue::TimestampNanosecond; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; @@ -87,6 +88,7 @@ fn criterion_benchmark(c: &mut Criterion) { let batch_len = data_arr.len(); let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef); let patterns = ColumnarValue::Array(Arc::new(patterns(&mut rng)) as ArrayRef); + let config_options = ConfigOptions::default_singleton_arc(); b.iter(|| { black_box( @@ -95,6 +97,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![data.clone(), patterns.clone()], number_rows: batch_len, return_type: &DataType::Utf8, + config_options, }) .expect("to_char should work on valid values"), ) @@ -108,6 +111,7 @@ fn criterion_benchmark(c: &mut Criterion) { let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef); let patterns = ColumnarValue::Scalar(ScalarValue::Utf8(Some("%Y-%m-%d".to_string()))); + let config_options = ConfigOptions::default_singleton_arc(); b.iter(|| { black_box( @@ -116,6 +120,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![data.clone(), patterns.clone()], number_rows: batch_len, return_type: &DataType::Utf8, + config_options, }) .expect("to_char should work on valid values"), ) @@ -135,6 +140,7 @@ fn criterion_benchmark(c: &mut Criterion) { let pattern = ColumnarValue::Scalar(ScalarValue::Utf8(Some( "%d-%m-%Y %H:%M:%S".to_string(), ))); + let config_options = ConfigOptions::default_singleton_arc(); b.iter(|| { black_box( @@ -143,6 +149,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![data.clone(), pattern.clone()], number_rows: 1, return_type: &DataType::Utf8, + config_options, }) .expect("to_char should work on valid values"), ) diff --git a/datafusion/functions/benches/to_hex.rs b/datafusion/functions/benches/to_hex.rs index a45d936c0a52..3b5c18128c51 100644 --- a/datafusion/functions/benches/to_hex.rs +++ b/datafusion/functions/benches/to_hex.rs @@ -20,6 +20,7 @@ extern crate criterion; use arrow::datatypes::{DataType, Int32Type, Int64Type}; use arrow::util::bench_util::create_primitive_array; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string; use std::sync::Arc; @@ -30,6 +31,8 @@ fn criterion_benchmark(c: &mut Criterion) { let i32_array = Arc::new(create_primitive_array::(size, 0.2)); let batch_len = i32_array.len(); let i32_args = vec![ColumnarValue::Array(i32_array)]; + let config_options = ConfigOptions::default_singleton_arc(); + c.bench_function(&format!("to_hex i32 array: {}", size), |b| { b.iter(|| { let args_cloned = i32_args.clone(); @@ -38,6 +41,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: batch_len, return_type: &DataType::Utf8, + config_options, }) .unwrap(), ) @@ -54,6 +58,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: batch_len, return_type: &DataType::Utf8, + config_options, }) .unwrap(), ) diff --git a/datafusion/functions/benches/to_timestamp.rs b/datafusion/functions/benches/to_timestamp.rs index aec56697691f..c2084b49b7b5 100644 --- a/datafusion/functions/benches/to_timestamp.rs +++ b/datafusion/functions/benches/to_timestamp.rs @@ -24,7 +24,7 @@ use arrow::array::{Array, ArrayRef, StringArray}; use arrow::compute::cast; use arrow::datatypes::{DataType, TimeUnit}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; - +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::to_timestamp; @@ -114,6 +114,7 @@ fn criterion_benchmark(c: &mut Criterion) { let arr_data = data(); let batch_len = arr_data.len(); let string_array = ColumnarValue::Array(Arc::new(arr_data) as ArrayRef); + let config_options = ConfigOptions::default_singleton_arc(); b.iter(|| { black_box( @@ -122,6 +123,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![string_array.clone()], number_rows: batch_len, return_type, + config_options, }) .expect("to_timestamp should work on valid values"), ) @@ -132,6 +134,7 @@ fn criterion_benchmark(c: &mut Criterion) { let data = cast(&data(), &DataType::LargeUtf8).unwrap(); let batch_len = data.len(); let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); + let config_options = ConfigOptions::default_singleton_arc(); b.iter(|| { black_box( @@ -140,6 +143,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![string_array.clone()], number_rows: batch_len, return_type, + config_options, }) .expect("to_timestamp should work on valid values"), ) @@ -150,6 +154,7 @@ fn criterion_benchmark(c: &mut Criterion) { let data = cast(&data(), &DataType::Utf8View).unwrap(); let batch_len = data.len(); let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); + let config_options = ConfigOptions::default_singleton_arc(); b.iter(|| { black_box( @@ -158,6 +163,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: vec![string_array.clone()], number_rows: batch_len, return_type, + config_options, }) .expect("to_timestamp should work on valid values"), ) @@ -174,6 +180,8 @@ fn criterion_benchmark(c: &mut Criterion) { ColumnarValue::Array(Arc::new(format2) as ArrayRef), ColumnarValue::Array(Arc::new(format3) as ArrayRef), ]; + let config_options = ConfigOptions::default_singleton_arc(); + b.iter(|| { black_box( to_timestamp() @@ -181,6 +189,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: batch_len, return_type, + config_options, }) .expect("to_timestamp should work on valid values"), ) @@ -205,6 +214,8 @@ fn criterion_benchmark(c: &mut Criterion) { Arc::new(cast(&format3, &DataType::LargeUtf8).unwrap()) as ArrayRef ), ]; + let config_options = ConfigOptions::default_singleton_arc(); + b.iter(|| { black_box( to_timestamp() @@ -212,6 +223,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: batch_len, return_type, + config_options, }) .expect("to_timestamp should work on valid values"), ) @@ -237,6 +249,8 @@ fn criterion_benchmark(c: &mut Criterion) { Arc::new(cast(&format3, &DataType::Utf8View).unwrap()) as ArrayRef ), ]; + let config_options = ConfigOptions::default_singleton_arc(); + b.iter(|| { black_box( to_timestamp() @@ -244,6 +258,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args.clone(), number_rows: batch_len, return_type, + config_options, }) .expect("to_timestamp should work on valid values"), ) diff --git a/datafusion/functions/benches/trunc.rs b/datafusion/functions/benches/trunc.rs index 7fc93921d2e7..5b6867bcae7f 100644 --- a/datafusion/functions/benches/trunc.rs +++ b/datafusion/functions/benches/trunc.rs @@ -26,10 +26,13 @@ use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::trunc; use arrow::datatypes::DataType; +use datafusion_common::config::ConfigOptions; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let trunc = trunc(); + let config_options = ConfigOptions::default_singleton_arc(); + for size in [1024, 4096, 8192] { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); let f32_args = vec![ColumnarValue::Array(f32_array)]; @@ -41,6 +44,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: f32_args.clone(), number_rows: size, return_type: &DataType::Float32, + config_options, }) .unwrap(), ) @@ -56,6 +60,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: f64_args.clone(), number_rows: size, return_type: &DataType::Float64, + config_options, }) .unwrap(), ) diff --git a/datafusion/functions/benches/upper.rs b/datafusion/functions/benches/upper.rs index f0bee89c7d37..b2f913309402 100644 --- a/datafusion/functions/benches/upper.rs +++ b/datafusion/functions/benches/upper.rs @@ -20,6 +20,7 @@ extern crate criterion; use arrow::datatypes::DataType; use arrow::util::bench_util::create_string_array_with_len; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string; use std::sync::Arc; @@ -35,6 +36,8 @@ fn create_args(size: usize, str_len: usize) -> Vec { fn criterion_benchmark(c: &mut Criterion) { let upper = string::upper(); + let config_options = ConfigOptions::default_singleton_arc(); + for size in [1024, 4096, 8192] { let args = create_args(size, 32); c.bench_function("upper_all_values_are_ascii", |b| { @@ -44,6 +47,7 @@ fn criterion_benchmark(c: &mut Criterion) { args: args_cloned, number_rows: size, return_type: &DataType::Utf8, + config_options, })) }) }); diff --git a/datafusion/functions/benches/uuid.rs b/datafusion/functions/benches/uuid.rs index 7b8d156fec21..153d891a691e 100644 --- a/datafusion/functions/benches/uuid.rs +++ b/datafusion/functions/benches/uuid.rs @@ -19,17 +19,21 @@ extern crate criterion; use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarFunctionArgs; use datafusion_functions::string; fn criterion_benchmark(c: &mut Criterion) { let uuid = string::uuid(); + let config_options = ConfigOptions::default_singleton_arc(); + c.bench_function("uuid", |b| { b.iter(|| { black_box(uuid.invoke_with_args(ScalarFunctionArgs { args: vec![], number_rows: 1024, return_type: &DataType::Utf8, + config_options, })) }) }); diff --git a/datafusion/functions/src/core/union_extract.rs b/datafusion/functions/src/core/union_extract.rs index 420eeed42cc3..1f2863fc0cf4 100644 --- a/datafusion/functions/src/core/union_extract.rs +++ b/datafusion/functions/src/core/union_extract.rs @@ -171,6 +171,7 @@ fn find_field<'a>(fields: &'a UnionFields, name: &str) -> Result<(i8, &'a FieldR mod tests { use arrow::datatypes::{DataType, Field, UnionFields, UnionMode}; + use datafusion_common::config::ConfigOptions; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; @@ -188,6 +189,7 @@ mod tests { Field::new("int", DataType::Int32, false), ], ); + let config_options = ConfigOptions::default_singleton(); let result = fun.invoke_with_args(ScalarFunctionArgs { args: vec![ @@ -200,6 +202,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Utf8, + config_options, })?; assert_scalar(result, ScalarValue::Utf8(None)); @@ -215,6 +218,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Utf8, + config_options, })?; assert_scalar(result, ScalarValue::Utf8(None)); @@ -230,6 +234,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Utf8, + config_options, })?; assert_scalar(result, ScalarValue::new_utf8("42")); diff --git a/datafusion/functions/src/core/version.rs b/datafusion/functions/src/core/version.rs index 34038022f2dc..92ed2dba30e9 100644 --- a/datafusion/functions/src/core/version.rs +++ b/datafusion/functions/src/core/version.rs @@ -97,6 +97,7 @@ impl ScalarUDFImpl for VersionFunc { #[cfg(test)] mod test { use super::*; + use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarUDF; #[tokio::test] @@ -107,6 +108,7 @@ mod test { args: vec![], number_rows: 0, return_type: &DataType::Utf8, + config_options: ConfigOptions::default_singleton(), }) .unwrap(); diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index 5ffae46dde48..45e09de13331 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -508,6 +508,7 @@ mod tests { use arrow::datatypes::{DataType, TimeUnit}; use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; @@ -515,6 +516,7 @@ mod tests { #[test] fn test_date_bin() { + let config_options = ConfigOptions::default_singleton(); let mut args = datafusion_expr::ScalarFunctionArgs { args: vec![ ColumnarValue::Scalar(ScalarValue::IntervalDayTime(Some( @@ -528,6 +530,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, None), + config_options, }; let res = DateBinFunc::new().invoke_with_args(args); assert!(res.is_ok()); @@ -547,6 +550,7 @@ mod tests { ], number_rows: batch_len, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, None), + config_options, }; let res = DateBinFunc::new().invoke_with_args(args); assert!(res.is_ok()); @@ -563,6 +567,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, None), + config_options, }; let res = DateBinFunc::new().invoke_with_args(args); assert!(res.is_ok()); @@ -582,6 +587,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, None), + config_options, }; let res = DateBinFunc::new().invoke_with_args(args); assert!(res.is_ok()); @@ -600,6 +606,7 @@ mod tests { )))], number_rows: 1, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, None), + config_options, }; let res = DateBinFunc::new().invoke_with_args(args); assert_eq!( @@ -616,6 +623,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, None), + config_options, }; let res = DateBinFunc::new().invoke_with_args(args); assert_eq!( @@ -638,6 +646,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, None), + config_options, }; let res = DateBinFunc::new().invoke_with_args(args); @@ -657,6 +666,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, None), + config_options, }; let res = DateBinFunc::new().invoke_with_args(args); assert_eq!( @@ -673,6 +683,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, None), + config_options, }; let res = DateBinFunc::new().invoke_with_args(args); assert_eq!( @@ -689,6 +700,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, None), + config_options, }; let res = DateBinFunc::new().invoke_with_args(args); assert_eq!( @@ -710,6 +722,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, None), + config_options, }; let res = DateBinFunc::new().invoke_with_args(args); assert_eq!( @@ -730,6 +743,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, None), + config_options, }; let res = DateBinFunc::new().invoke_with_args(args); assert!(res.is_ok()); @@ -753,6 +767,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, None), + config_options, }; let res = DateBinFunc::new().invoke_with_args(args); assert_eq!( @@ -776,6 +791,7 @@ mod tests { ], number_rows: batch_len, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, None), + config_options, }; let res = DateBinFunc::new().invoke_with_args(args); assert_eq!( @@ -907,6 +923,7 @@ mod tests { TimeUnit::Nanosecond, tz_opt.clone(), ), + config_options: ConfigOptions::default_singleton(), }; let result = DateBinFunc::new().invoke_with_args(args).unwrap(); if let ColumnarValue::Array(result) = result { diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs index ed3eb228bf03..9388164f66c0 100644 --- a/datafusion/functions/src/datetime/date_trunc.rs +++ b/datafusion/functions/src/datetime/date_trunc.rs @@ -488,6 +488,7 @@ mod tests { use arrow::array::{Array, TimestampNanosecondArray}; use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; use arrow::datatypes::{DataType, TimeUnit}; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; @@ -733,6 +734,7 @@ mod tests { ], number_rows: batch_len, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()), + config_options: ConfigOptions::default_singleton(), }; let result = DateTruncFunc::new().invoke_with_args(args).unwrap(); if let ColumnarValue::Array(result) = result { @@ -895,6 +897,7 @@ mod tests { ], number_rows: batch_len, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()), + config_options: ConfigOptions::default_singleton(), }; let result = DateTruncFunc::new().invoke_with_args(args).unwrap(); if let ColumnarValue::Array(result) = result { diff --git a/datafusion/functions/src/datetime/from_unixtime.rs b/datafusion/functions/src/datetime/from_unixtime.rs index ed8181452dbd..28191e3d0bc5 100644 --- a/datafusion/functions/src/datetime/from_unixtime.rs +++ b/datafusion/functions/src/datetime/from_unixtime.rs @@ -163,6 +163,7 @@ mod test { use crate::datetime::from_unixtime::FromUnixtimeFunc; use arrow::datatypes::DataType; use arrow::datatypes::TimeUnit::Second; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_common::ScalarValue::Int64; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; @@ -174,6 +175,7 @@ mod test { args: vec![ColumnarValue::Scalar(Int64(Some(1729900800)))], number_rows: 1, return_type: &DataType::Timestamp(Second, None), + config_options: ConfigOptions::default_singleton(), }; let result = FromUnixtimeFunc::new().invoke_with_args(args).unwrap(); @@ -199,6 +201,7 @@ mod test { Second, Some(Arc::from("America/New_York")), ), + config_options: ConfigOptions::default_singleton(), }; let result = FromUnixtimeFunc::new().invoke_with_args(args).unwrap(); diff --git a/datafusion/functions/src/datetime/make_date.rs b/datafusion/functions/src/datetime/make_date.rs index 929fa601f107..8b784750a390 100644 --- a/datafusion/functions/src/datetime/make_date.rs +++ b/datafusion/functions/src/datetime/make_date.rs @@ -224,12 +224,14 @@ mod tests { use crate::datetime::make_date::MakeDateFunc; use arrow::array::{Array, Date32Array, Int32Array, Int64Array, UInt32Array}; use arrow::datatypes::DataType; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; use std::sync::Arc; #[test] fn test_make_date() { + let config_options = ConfigOptions::default_singleton(); let args = datafusion_expr::ScalarFunctionArgs { args: vec![ ColumnarValue::Scalar(ScalarValue::Int32(Some(2024))), @@ -238,6 +240,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Date32, + config_options, }; let res = MakeDateFunc::new() .invoke_with_args(args) @@ -257,6 +260,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Date32, + config_options, }; let res = MakeDateFunc::new() .invoke_with_args(args) @@ -276,6 +280,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Date32, + config_options, }; let res = MakeDateFunc::new() .invoke_with_args(args) @@ -299,6 +304,7 @@ mod tests { ], number_rows: batch_len, return_type: &DataType::Date32, + config_options, }; let res = MakeDateFunc::new() .invoke_with_args(args) @@ -325,6 +331,7 @@ mod tests { args: vec![ColumnarValue::Scalar(ScalarValue::Int32(Some(1)))], number_rows: 1, return_type: &DataType::Date32, + config_options, }; let res = MakeDateFunc::new().invoke_with_args(args); assert_eq!( @@ -341,6 +348,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Date32, + config_options, }; let res = MakeDateFunc::new().invoke_with_args(args); assert_eq!( @@ -357,6 +365,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Date32, + config_options, }; let res = MakeDateFunc::new().invoke_with_args(args); assert_eq!( @@ -373,6 +382,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Date32, + config_options, }; let res = MakeDateFunc::new().invoke_with_args(args); assert_eq!( diff --git a/datafusion/functions/src/datetime/to_char.rs b/datafusion/functions/src/datetime/to_char.rs index 8b2e5ad87471..3c7a5debc27d 100644 --- a/datafusion/functions/src/datetime/to_char.rs +++ b/datafusion/functions/src/datetime/to_char.rs @@ -305,6 +305,7 @@ mod tests { }; use arrow::datatypes::DataType; use chrono::{NaiveDateTime, Timelike}; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; use std::sync::Arc; @@ -384,11 +385,13 @@ mod tests { ), ]; + let config_options = ConfigOptions::default_singleton(); for (value, format, expected) in scalar_data { let args = datafusion_expr::ScalarFunctionArgs { args: vec![ColumnarValue::Scalar(value), ColumnarValue::Scalar(format)], number_rows: 1, return_type: &DataType::Utf8, + config_options, }; let result = ToCharFunc::new() .invoke_with_args(args) @@ -472,6 +475,7 @@ mod tests { ], number_rows: batch_len, return_type: &DataType::Utf8, + config_options, }; let result = ToCharFunc::new() .invoke_with_args(args) @@ -603,6 +607,7 @@ mod tests { ], number_rows: batch_len, return_type: &DataType::Utf8, + config_options, }; let result = ToCharFunc::new() .invoke_with_args(args) @@ -625,6 +630,7 @@ mod tests { ], number_rows: batch_len, return_type: &DataType::Utf8, + config_options, }; let result = ToCharFunc::new() .invoke_with_args(args) @@ -647,6 +653,7 @@ mod tests { args: vec![ColumnarValue::Scalar(ScalarValue::Int32(Some(1)))], number_rows: 1, return_type: &DataType::Utf8, + config_options, }; let result = ToCharFunc::new().invoke_with_args(args); assert_eq!( @@ -662,6 +669,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Utf8, + config_options, }; let result = ToCharFunc::new().invoke_with_args(args); assert_eq!( diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs index 91740b2c31c1..fd1250ca45af 100644 --- a/datafusion/functions/src/datetime/to_date.rs +++ b/datafusion/functions/src/datetime/to_date.rs @@ -165,6 +165,7 @@ mod tests { use arrow::array::{Array, Date32Array, GenericStringArray, StringViewArray}; use arrow::datatypes::DataType; use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type}; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; use std::sync::Arc; @@ -212,6 +213,7 @@ mod tests { args: vec![ColumnarValue::Scalar(sv)], number_rows: 1, return_type: &DataType::Date32, + config_options: ConfigOptions::default_singleton(), }; let to_date_result = ToDateFunc::new().invoke_with_args(args); @@ -238,6 +240,7 @@ mod tests { args: vec![ColumnarValue::Array(Arc::new(date_array))], number_rows: batch_len, return_type: &DataType::Date32, + config_options: ConfigOptions::default_singleton(), }; let to_date_result = ToDateFunc::new().invoke_with_args(args); @@ -335,6 +338,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Date32, + config_options: ConfigOptions::default_singleton(), }; let to_date_result = ToDateFunc::new().invoke_with_args(args); @@ -365,6 +369,7 @@ mod tests { ], number_rows: batch_len, return_type: &DataType::Date32, + config_options: ConfigOptions::default_singleton(), }; let to_date_result = ToDateFunc::new().invoke_with_args(args); @@ -406,6 +411,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Date32, + config_options: ConfigOptions::default_singleton(), }; let to_date_result = ToDateFunc::new().invoke_with_args(args); @@ -435,6 +441,7 @@ mod tests { args: vec![ColumnarValue::Scalar(formatted_date_scalar)], number_rows: 1, return_type: &DataType::Date32, + config_options: ConfigOptions::default_singleton(), }; let to_date_result = ToDateFunc::new().invoke_with_args(args); @@ -457,6 +464,7 @@ mod tests { args: vec![ColumnarValue::Scalar(date_scalar)], number_rows: 1, return_type: &DataType::Date32, + config_options: ConfigOptions::default_singleton(), }; let to_date_result = ToDateFunc::new().invoke_with_args(args); @@ -482,6 +490,7 @@ mod tests { args: vec![ColumnarValue::Scalar(date_scalar)], number_rows: 1, return_type: &DataType::Date32, + config_options: ConfigOptions::default_singleton(), }; let to_date_result = ToDateFunc::new().invoke_with_args(args); diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs index 8dbef90cdc3f..d972321c8e96 100644 --- a/datafusion/functions/src/datetime/to_local_time.rs +++ b/datafusion/functions/src/datetime/to_local_time.rs @@ -411,6 +411,7 @@ mod tests { use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; use arrow::datatypes::{DataType, TimeUnit}; use chrono::NaiveDateTime; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; @@ -543,6 +544,7 @@ mod tests { args: vec![ColumnarValue::Scalar(input)], number_rows: 1, return_type: &expected.data_type(), + config_options: ConfigOptions::default_singleton(), }) .unwrap(); match res { @@ -606,6 +608,7 @@ mod tests { args: vec![ColumnarValue::Array(Arc::new(input))], number_rows: batch_size, return_type: &DataType::Timestamp(TimeUnit::Nanosecond, None), + config_options: ConfigOptions::default_singleton(), }; let result = ToLocalTimeFunc::new().invoke_with_args(args).unwrap(); if let ColumnarValue::Array(result) = result { diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs index f1c61fe2b964..3993c643638f 100644 --- a/datafusion/functions/src/datetime/to_timestamp.rs +++ b/datafusion/functions/src/datetime/to_timestamp.rs @@ -618,6 +618,7 @@ mod tests { use arrow::array::{ArrayRef, Int64Array, StringBuilder}; use arrow::datatypes::TimeUnit; use chrono::Utc; + use datafusion_common::config::ConfigOptions; use datafusion_common::{assert_contains, DataFusionError, ScalarValue}; use datafusion_expr::ScalarFunctionImplementation; @@ -994,6 +995,7 @@ mod tests { args: vec![array.clone()], number_rows: 4, return_type: &rt, + config_options: ConfigOptions::default_singleton(), }; let res = udf .invoke_with_args(args) @@ -1041,6 +1043,7 @@ mod tests { args: vec![array.clone()], number_rows: 5, return_type: &rt, + config_options: ConfigOptions::default_singleton(), }; let res = udf .invoke_with_args(args) diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index fd135f4c5ec0..47a70a5b2c67 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -257,6 +257,7 @@ mod tests { use arrow::array::{Float32Array, Float64Array, Int64Array}; use arrow::compute::SortOptions; use datafusion_common::cast::{as_float32_array, as_float64_array}; + use datafusion_common::config::ConfigOptions; use datafusion_common::DFSchema; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::simplify::SimplifyContext; @@ -273,6 +274,7 @@ mod tests { ], number_rows: 4, return_type: &DataType::Float64, + config_options: ConfigOptions::default_singleton(), }; let _ = LogFunc::new().invoke_with_args(args); } @@ -285,6 +287,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Float64, + config_options: ConfigOptions::default_singleton(), }; let result = LogFunc::new().invoke_with_args(args); @@ -299,6 +302,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Float32, + config_options: ConfigOptions::default_singleton(), }; let result = LogFunc::new() .invoke_with_args(args) @@ -326,6 +330,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Float64, + config_options: ConfigOptions::default_singleton(), }; let result = LogFunc::new() .invoke_with_args(args) @@ -354,6 +359,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Float32, + config_options: ConfigOptions::default_singleton(), }; let result = LogFunc::new() .invoke_with_args(args) @@ -382,6 +388,7 @@ mod tests { ], number_rows: 1, return_type: &DataType::Float64, + config_options: ConfigOptions::default_singleton(), }; let result = LogFunc::new() .invoke_with_args(args) @@ -411,6 +418,7 @@ mod tests { ], number_rows: 4, return_type: &DataType::Float64, + config_options: ConfigOptions::default_singleton(), }; let result = LogFunc::new() .invoke_with_args(args) @@ -443,6 +451,7 @@ mod tests { ], number_rows: 4, return_type: &DataType::Float32, + config_options: ConfigOptions::default_singleton(), }; let result = LogFunc::new() .invoke_with_args(args) @@ -478,6 +487,7 @@ mod tests { ], number_rows: 4, return_type: &DataType::Float64, + config_options: ConfigOptions::default_singleton(), }; let result = LogFunc::new() .invoke_with_args(args) @@ -513,6 +523,7 @@ mod tests { ], number_rows: 4, return_type: &DataType::Float32, + config_options: ConfigOptions::default_singleton(), }; let result = LogFunc::new() .invoke_with_args(args) @@ -538,9 +549,10 @@ mod tests { // Test log() simplification errors fn test_log_simplify_errors() { let props = ExecutionProps::new(); + let config_options = ConfigOptions::default_singleton_arc(); let schema = Arc::new(DFSchema::new_with_metadata(vec![], HashMap::new()).unwrap()); - let context = SimplifyContext::new(&props).with_schema(schema); + let context = SimplifyContext::new(&props, config_options).with_schema(schema); // Expect 0 args to error let _ = LogFunc::new().simplify(vec![], &context).unwrap_err(); // Expect 3 args to error @@ -553,9 +565,10 @@ mod tests { // Test that non-simplifiable log() expressions are unchanged after simplification fn test_log_simplify_original() { let props = ExecutionProps::new(); + let config_options = ConfigOptions::default_singleton_arc(); let schema = Arc::new(DFSchema::new_with_metadata(vec![], HashMap::new()).unwrap()); - let context = SimplifyContext::new(&props).with_schema(schema); + let context = SimplifyContext::new(&props, config_options).with_schema(schema); // One argument with no simplifications let result = LogFunc::new().simplify(vec![lit(2)], &context).unwrap(); let ExprSimplifyResult::Original(args) = result else { diff --git a/datafusion/functions/src/math/power.rs b/datafusion/functions/src/math/power.rs index 028ec2fef793..c68a974ea9f2 100644 --- a/datafusion/functions/src/math/power.rs +++ b/datafusion/functions/src/math/power.rs @@ -188,6 +188,7 @@ fn is_log(func: &ScalarUDF) -> bool { mod tests { use arrow::array::Float64Array; use datafusion_common::cast::{as_float64_array, as_int64_array}; + use datafusion_common::config::ConfigOptions; use super::*; @@ -204,6 +205,7 @@ mod tests { ], number_rows: 4, return_type: &DataType::Float64, + config_options: ConfigOptions::default_singleton(), }; let result = PowerFunc::new() .invoke_with_args(args) @@ -234,6 +236,7 @@ mod tests { ], number_rows: 4, return_type: &DataType::Int64, + config_options: ConfigOptions::default_singleton(), }; let result = PowerFunc::new() .invoke_with_args(args) diff --git a/datafusion/functions/src/math/signum.rs b/datafusion/functions/src/math/signum.rs index ba5422afa768..b8b816fce810 100644 --- a/datafusion/functions/src/math/signum.rs +++ b/datafusion/functions/src/math/signum.rs @@ -140,6 +140,7 @@ mod test { use arrow::array::{ArrayRef, Float32Array, Float64Array}; use arrow::datatypes::DataType; use datafusion_common::cast::{as_float32_array, as_float64_array}; + use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; use crate::math::signum::SignumFunc; @@ -161,6 +162,7 @@ mod test { args: vec![ColumnarValue::Array(Arc::clone(&array) as ArrayRef)], number_rows: array.len(), return_type: &DataType::Float32, + config_options: ConfigOptions::default_singleton(), }; let result = SignumFunc::new() .invoke_with_args(args) @@ -205,6 +207,7 @@ mod test { args: vec![ColumnarValue::Array(Arc::clone(&array) as ArrayRef)], number_rows: array.len(), return_type: &DataType::Float64, + config_options: ConfigOptions::default_singleton(), }; let result = SignumFunc::new() .invoke_with_args(args) diff --git a/datafusion/functions/src/regex/regexpcount.rs b/datafusion/functions/src/regex/regexpcount.rs index 8cb1a4ff3d60..c48714f0bfec 100644 --- a/datafusion/functions/src/regex/regexpcount.rs +++ b/datafusion/functions/src/regex/regexpcount.rs @@ -619,6 +619,7 @@ fn count_matches( mod tests { use super::*; use arrow::array::{GenericStringArray, StringViewArray}; + use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarFunctionArgs; #[test] @@ -651,6 +652,7 @@ mod tests { let values = ["", "aabca", "abcabc", "abcAbcab", "abcabcabc"]; let regex = "abc"; let expected: Vec = vec![0, 1, 2, 1, 3]; + let config_options = ConfigOptions::default_singleton(); values.iter().enumerate().for_each(|(pos, &v)| { // utf8 @@ -661,6 +663,7 @@ mod tests { args: vec![ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)], number_rows: 2, return_type: &Int64, + config_options, }); match re { Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { @@ -676,6 +679,7 @@ mod tests { args: vec![ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)], number_rows: 2, return_type: &Int64, + config_options, }); match re { Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { @@ -691,6 +695,7 @@ mod tests { args: vec![ColumnarValue::Scalar(v_sv), ColumnarValue::Scalar(regex_sv)], number_rows: 2, return_type: &Int64, + config_options, }); match re { Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { @@ -706,6 +711,7 @@ mod tests { let regex = "abc"; let start = 2; let expected: Vec = vec![0, 1, 1, 0, 2]; + let config_options = ConfigOptions::default_singleton(); values.iter().enumerate().for_each(|(pos, &v)| { // utf8 @@ -721,6 +727,7 @@ mod tests { ], number_rows: 3, return_type: &Int64, + config_options, }); match re { Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { @@ -740,6 +747,7 @@ mod tests { ], number_rows: 3, return_type: &Int64, + config_options, }); match re { Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { @@ -759,6 +767,7 @@ mod tests { ], number_rows: 3, return_type: &Int64, + config_options, }); match re { Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { @@ -775,6 +784,7 @@ mod tests { let start = 1; let flags = "i"; let expected: Vec = vec![0, 1, 2, 2, 3]; + let config_options = ConfigOptions::default_singleton(); values.iter().enumerate().for_each(|(pos, &v)| { // utf8 @@ -792,6 +802,7 @@ mod tests { ], number_rows: 4, return_type: &Int64, + config_options, }); match re { Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { @@ -813,6 +824,7 @@ mod tests { ], number_rows: 4, return_type: &Int64, + config_options, }); match re { Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { @@ -834,6 +846,7 @@ mod tests { ], number_rows: 4, return_type: &Int64, + config_options, }); match re { Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { @@ -899,6 +912,7 @@ mod tests { let start = 5; let flags = ["", "i", "", "", "i"]; let expected: Vec = vec![0, 0, 0, 1, 1]; + let config_options = ConfigOptions::default_singleton(); values.iter().enumerate().for_each(|(pos, &v)| { // utf8 @@ -916,6 +930,7 @@ mod tests { ], number_rows: 4, return_type: &Int64, + config_options, }); match re { Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { @@ -937,6 +952,7 @@ mod tests { ], number_rows: 4, return_type: &Int64, + config_options, }); match re { Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { @@ -958,6 +974,7 @@ mod tests { ], number_rows: 4, return_type: &Int64, + config_options, }); match re { Ok(ColumnarValue::Scalar(ScalarValue::Int64(v))) => { diff --git a/datafusion/functions/src/string/concat.rs b/datafusion/functions/src/string/concat.rs index c47d08d579e4..4d916223e3e5 100644 --- a/datafusion/functions/src/string/concat.rs +++ b/datafusion/functions/src/string/concat.rs @@ -376,6 +376,7 @@ mod tests { use crate::utils::test::test_function; use arrow::array::{Array, LargeStringArray, StringViewArray}; use arrow::array::{ArrayRef, StringArray}; + use datafusion_common::config::ConfigOptions; use DataType::*; #[test] @@ -473,6 +474,7 @@ mod tests { args: vec![c0, c1, c2, c3, c4], number_rows: 3, return_type: &Utf8, + config_options: ConfigOptions::default_singleton(), }; let result = ConcatFunc::new().invoke_with_args(args)?; diff --git a/datafusion/functions/src/string/concat_ws.rs b/datafusion/functions/src/string/concat_ws.rs index c2bad206db15..18db2c76f400 100644 --- a/datafusion/functions/src/string/concat_ws.rs +++ b/datafusion/functions/src/string/concat_ws.rs @@ -407,6 +407,7 @@ mod tests { use arrow::datatypes::DataType::Utf8; use crate::string::concat_ws::ConcatWsFunc; + use datafusion_common::config::ConfigOptions; use datafusion_common::Result; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; @@ -485,6 +486,7 @@ mod tests { args: vec![c0, c1, c2], number_rows: 3, return_type: &Utf8, + config_options: ConfigOptions::default_singleton(), }; let result = ConcatWsFunc::new().invoke_with_args(args)?; @@ -515,6 +517,7 @@ mod tests { args: vec![c0, c1, c2], number_rows: 3, return_type: &Utf8, + config_options: ConfigOptions::default_singleton(), }; let result = ConcatWsFunc::new().invoke_with_args(args)?; diff --git a/datafusion/functions/src/string/contains.rs b/datafusion/functions/src/string/contains.rs index 05a3edf61c5a..0ad356b6a73b 100644 --- a/datafusion/functions/src/string/contains.rs +++ b/datafusion/functions/src/string/contains.rs @@ -152,6 +152,7 @@ mod test { use super::ContainsFunc; use arrow::array::{BooleanArray, StringArray}; use arrow::datatypes::DataType; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; use std::sync::Arc; @@ -169,6 +170,7 @@ mod test { args: vec![array, scalar], number_rows: 2, return_type: &DataType::Boolean, + config_options: ConfigOptions::default_singleton(), }; let actual = udf.invoke_with_args(args).unwrap(); diff --git a/datafusion/functions/src/string/lower.rs b/datafusion/functions/src/string/lower.rs index 226275b13999..cce5b27c9f8a 100644 --- a/datafusion/functions/src/string/lower.rs +++ b/datafusion/functions/src/string/lower.rs @@ -98,6 +98,7 @@ impl ScalarUDFImpl for LowerFunc { mod tests { use super::*; use arrow::array::{Array, ArrayRef, StringArray}; + use datafusion_common::config::ConfigOptions; use std::sync::Arc; fn to_lower(input: ArrayRef, expected: ArrayRef) -> Result<()> { @@ -107,6 +108,7 @@ mod tests { number_rows: input.len(), args: vec![ColumnarValue::Array(input)], return_type: &DataType::Utf8, + config_options: ConfigOptions::default_singleton(), }; let result = match func.invoke_with_args(args)? { diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs index 2fec7305d183..ea51c1e54e8f 100644 --- a/datafusion/functions/src/string/upper.rs +++ b/datafusion/functions/src/string/upper.rs @@ -97,6 +97,7 @@ impl ScalarUDFImpl for UpperFunc { mod tests { use super::*; use arrow::array::{Array, ArrayRef, StringArray}; + use datafusion_common::config::ConfigOptions; use std::sync::Arc; fn to_upper(input: ArrayRef, expected: ArrayRef) -> Result<()> { @@ -106,6 +107,7 @@ mod tests { number_rows: input.len(), args: vec![ColumnarValue::Array(input)], return_type: &DataType::Utf8, + config_options: ConfigOptions::default_singleton(), }; let result = match func.invoke_with_args(args)? { diff --git a/datafusion/functions/src/unicode/find_in_set.rs b/datafusion/functions/src/unicode/find_in_set.rs index c4a9f067e9f4..952f3879da4e 100644 --- a/datafusion/functions/src/unicode/find_in_set.rs +++ b/datafusion/functions/src/unicode/find_in_set.rs @@ -457,6 +457,8 @@ mod tests { ($test_name:ident, $args:expr, $expected:expr) => { #[test] fn $test_name() -> Result<()> { + use datafusion_common::config::ConfigOptions; + let fis = crate::unicode::find_in_set(); let args = $args; @@ -475,6 +477,7 @@ mod tests { args, number_rows: cardinality, return_type: &return_type, + config_options: ConfigOptions::default_singleton(), }); assert!(result.is_ok()); diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 47f3121ba2ce..d482443c8831 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -158,6 +158,7 @@ pub mod test { scalar_arguments: &scalar_arguments_refs, nullables: &nullables }); + let config_options = datafusion_common::config::ConfigOptions::default_singleton(); match expected { Ok(expected) => { @@ -165,7 +166,7 @@ pub mod test { let (return_type, _nullable) = return_info.unwrap().into_parts(); assert_eq!(return_type, $EXPECTED_DATA_TYPE); - let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type}); + let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type, config_options}); assert_eq!(result.is_ok(), true, "function returned an error: {}", result.unwrap_err()); let result = result.unwrap().to_array(cardinality).expect("Failed to convert to array"); @@ -189,7 +190,7 @@ pub mod test { let (return_type, _nullable) = return_info.unwrap().into_parts(); // invoke is expected error - cannot use .expect_err() due to Debug not being implemented - match func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type}) { + match func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type, config_options}) { Ok(_) => assert!(false, "expected error"), Err(error) => { assert!(expected_error.strip_backtrace().starts_with(&error.strip_backtrace())); diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs index 71ff863b51a1..c20a41199c38 100644 --- a/datafusion/optimizer/src/decorrelate.rs +++ b/datafusion/optimizer/src/decorrelate.rs @@ -23,6 +23,7 @@ use std::sync::Arc; use crate::simplify_expressions::ExprSimplifier; +use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter, }; @@ -44,7 +45,7 @@ use datafusion_physical_expr::execution_props::ExecutionProps; /// 'Aggregate' of the subquery if they are missing, so that they can be /// evaluated by the parent operator as the join condition. #[derive(Debug)] -pub struct PullUpCorrelatedExpr { +pub struct PullUpCorrelatedExpr<'a> { pub join_filters: Vec, /// mapping from the plan to its holding correlated columns pub correlated_subquery_cols_map: HashMap>, @@ -71,16 +72,12 @@ pub struct PullUpCorrelatedExpr { pub collected_count_expr_map: HashMap, /// pull up having expr, which must be evaluated after the Join pub pull_up_having_expr: Option, + /// config options + pub config_options: &'a Arc, } -impl Default for PullUpCorrelatedExpr { - fn default() -> Self { - Self::new() - } -} - -impl PullUpCorrelatedExpr { - pub fn new() -> Self { +impl<'a> PullUpCorrelatedExpr<'a> { + pub fn new(config_options: &'a Arc) -> Self { Self { join_filters: vec![], correlated_subquery_cols_map: HashMap::new(), @@ -91,6 +88,7 @@ impl PullUpCorrelatedExpr { need_handle_count_bug: false, collected_count_expr_map: HashMap::new(), pull_up_having_expr: None, + config_options, } } @@ -113,6 +111,12 @@ impl PullUpCorrelatedExpr { self.exists_sub_query = exists_sub_query; self } + + /// Set the config options + pub fn with_config_options(mut self, config_options: &'a Arc) -> Self { + self.config_options = config_options; + self + } } /// Used to indicate the unmatched rows from the inner(subquery) table after the left out Join @@ -126,7 +130,7 @@ pub const UN_MATCHED_ROW_INDICATOR: &str = "__always_true"; /// 'ScalarValue(2)') pub type ExprResultMap = HashMap; -impl TreeNodeRewriter for PullUpCorrelatedExpr { +impl TreeNodeRewriter for PullUpCorrelatedExpr<'_> { type Node = LogicalPlan; fn f_down(&mut self, plan: LogicalPlan) -> Result> { @@ -196,6 +200,7 @@ impl TreeNodeRewriter for PullUpCorrelatedExpr { Arc::clone(plan_filter.input.schema()), expr_result_map, &mut expr_result_map_for_count_bug, + self.config_options, )? } else { None @@ -254,6 +259,7 @@ impl TreeNodeRewriter for PullUpCorrelatedExpr { projection.input.schema(), expr_result_map, &mut expr_result_map_for_count_bug, + self.config_options, )?; if !expr_result_map_for_count_bug.is_empty() { // has count bug @@ -305,6 +311,7 @@ impl TreeNodeRewriter for PullUpCorrelatedExpr { &aggregate.aggr_expr, aggregate.input.schema(), &mut expr_result_map_for_count_bug, + self.config_options, )?; if !expr_result_map_for_count_bug.is_empty() { // has count bug @@ -375,7 +382,7 @@ impl TreeNodeRewriter for PullUpCorrelatedExpr { } } -impl PullUpCorrelatedExpr { +impl PullUpCorrelatedExpr<'_> { fn collect_missing_exprs( &self, exprs: &[Expr], @@ -477,6 +484,7 @@ fn agg_exprs_evaluation_result_on_empty_batch( agg_expr: &[Expr], schema: &DFSchemaRef, expr_result_map_for_count_bug: &mut ExprResultMap, + config_options: &Arc, ) -> Result<()> { for e in agg_expr.iter() { let result_expr = e @@ -498,7 +506,8 @@ fn agg_exprs_evaluation_result_on_empty_batch( let result_expr = result_expr.unalias(); let props = ExecutionProps::new(); - let info = SimplifyContext::new(&props).with_schema(Arc::clone(schema)); + let info = + SimplifyContext::new(&props, config_options).with_schema(Arc::clone(schema)); let simplifier = ExprSimplifier::new(info); let result_expr = simplifier.simplify(result_expr)?; if matches!(result_expr, Expr::Literal(ScalarValue::Int64(_))) { @@ -514,6 +523,7 @@ fn proj_exprs_evaluation_result_on_empty_batch( schema: &DFSchemaRef, input_expr_result_map_for_count_bug: &ExprResultMap, expr_result_map_for_count_bug: &mut ExprResultMap, + config_options: &Arc, ) -> Result<()> { for expr in proj_expr.iter() { let result_expr = expr @@ -535,7 +545,8 @@ fn proj_exprs_evaluation_result_on_empty_batch( if result_expr.ne(expr) { let props = ExecutionProps::new(); - let info = SimplifyContext::new(&props).with_schema(Arc::clone(schema)); + let info = SimplifyContext::new(&props, config_options) + .with_schema(Arc::clone(schema)); let simplifier = ExprSimplifier::new(info); let result_expr = simplifier.simplify(result_expr)?; let expr_name = match expr { @@ -558,6 +569,7 @@ fn filter_exprs_evaluation_result_on_empty_batch( schema: DFSchemaRef, input_expr_result_map_for_count_bug: &ExprResultMap, expr_result_map_for_count_bug: &mut ExprResultMap, + config_options: &Arc, ) -> Result> { let result_expr = filter_expr .clone() @@ -576,7 +588,7 @@ fn filter_exprs_evaluation_result_on_empty_batch( let pull_up_expr = if result_expr.ne(filter_expr) { let props = ExecutionProps::new(); - let info = SimplifyContext::new(&props).with_schema(schema); + let info = SimplifyContext::new(&props, config_options).with_schema(schema); let simplifier = ExprSimplifier::new(info); let result_expr = simplifier.simplify(result_expr)?; match &result_expr { diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs index c18c48251daa..351a8d7571c6 100644 --- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs +++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs @@ -26,6 +26,7 @@ use crate::utils::replace_qualified_name; use crate::{OptimizerConfig, OptimizerRule}; use datafusion_common::alias::AliasGenerator; +use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::{internal_err, plan_err, Column, Result}; use datafusion_expr::expr::{Exists, InSubquery}; @@ -91,8 +92,12 @@ impl OptimizerRule for DecorrelatePredicateSubquery { match extract_subquery_info(subquery_expr) { // The subquery expression is at the top level of the filter SubqueryPredicate::Top(subquery) => { - match build_join_top(&subquery, &cur_input, config.alias_generator())? - { + match build_join_top( + &subquery, + &cur_input, + config.alias_generator(), + config.options(), + )? { Some(plan) => cur_input = plan, // If the subquery can not be converted to a Join, reconstruct the subquery expression and add it to the Filter None => other_exprs.push(subquery.expr()), @@ -136,7 +141,14 @@ fn rewrite_inner_subqueries( Expr::Exists(Exists { subquery: Subquery { subquery, .. }, negated, - }) => match mark_join(&cur_input, Arc::clone(&subquery), None, negated, alias)? { + }) => match mark_join( + &cur_input, + Arc::clone(&subquery), + None, + negated, + alias, + config.options(), + )? { Some((plan, exists_expr)) => { cur_input = plan; Ok(Transformed::yes(exists_expr)) @@ -160,6 +172,7 @@ fn rewrite_inner_subqueries( Some(in_predicate), negated, alias, + config.options(), )? { Some((plan, exists_expr)) => { cur_input = plan; @@ -254,6 +267,7 @@ fn build_join_top( query_info: &SubqueryInfo, left: &LogicalPlan, alias: &Arc, + config_options: &Arc, ) -> Result> { let where_in_expr_opt = &query_info.where_in_expr; let in_predicate_opt = where_in_expr_opt @@ -275,7 +289,14 @@ fn build_join_top( }; let subquery = query_info.query.subquery.as_ref(); let subquery_alias = alias.next("__correlated_sq"); - build_join(left, subquery, in_predicate_opt, join_type, subquery_alias) + build_join( + left, + subquery, + in_predicate_opt, + join_type, + subquery_alias, + config_options, + ) } /// This is used to handle the case when the subquery is embedded in a more complex boolean @@ -299,16 +320,22 @@ fn mark_join( in_predicate_opt: Option, negated: bool, alias_generator: &Arc, + config_options: &Arc, ) -> Result> { let alias = alias_generator.next("__correlated_sq"); let exists_col = Expr::Column(Column::new(Some(alias.clone()), "mark")); let exists_expr = if negated { !exists_col } else { exists_col }; - Ok( - build_join(left, &subquery, in_predicate_opt, JoinType::LeftMark, alias)? - .map(|plan| (plan, exists_expr)), - ) + Ok(build_join( + left, + &subquery, + in_predicate_opt, + JoinType::LeftMark, + alias, + config_options, + )? + .map(|plan| (plan, exists_expr))) } fn build_join( @@ -317,8 +344,9 @@ fn build_join( in_predicate_opt: Option, join_type: JoinType, alias: String, + config_options: &Arc, ) -> Result> { - let mut pull_up = PullUpCorrelatedExpr::new() + let mut pull_up = PullUpCorrelatedExpr::new(config_options) .with_in_predicate_opt(in_predicate_opt.clone()) .with_exists_sub_query(in_predicate_opt.is_none()); diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs index 3a69bd91e749..24fd4f92172d 100644 --- a/datafusion/optimizer/src/optimizer.rs +++ b/datafusion/optimizer/src/optimizer.rs @@ -18,6 +18,7 @@ //! [`Optimizer`] and [`OptimizerRule`] use std::fmt::Debug; +use std::ops::Deref; use std::sync::Arc; use chrono::{DateTime, Utc}; @@ -106,7 +107,7 @@ pub trait OptimizerConfig { /// Return alias generator used to generate unique aliases for subqueries fn alias_generator(&self) -> &Arc; - fn options(&self) -> &ConfigOptions; + fn options(&self) -> &Arc; fn function_registry(&self) -> Option<&dyn FunctionRegistry> { None @@ -124,7 +125,8 @@ pub struct OptimizerContext { /// Alias generator used to generate unique aliases for subqueries alias_generator: Arc, - options: ConfigOptions, + /// configuration options + config_options: Arc, } impl OptimizerContext { @@ -136,13 +138,15 @@ impl OptimizerContext { Self { query_execution_start_time: Utc::now(), alias_generator: Arc::new(AliasGenerator::new()), - options, + config_options: Arc::new(options), } } /// Specify whether to enable the filter_null_keys rule pub fn filter_null_keys(mut self, filter_null_keys: bool) -> Self { - self.options.optimizer.filter_null_join_keys = filter_null_keys; + let mut config_options = self.config_options.deref().clone(); + config_options.optimizer.filter_null_join_keys = filter_null_keys; + self.config_options = Arc::new(config_options); self } @@ -159,13 +163,17 @@ impl OptimizerContext { /// Specify whether the optimizer should skip rules that produce /// errors, or fail the query pub fn with_skip_failing_rules(mut self, b: bool) -> Self { - self.options.optimizer.skip_failed_rules = b; + let mut config_options = self.config_options.deref().clone(); + config_options.optimizer.skip_failed_rules = b; + self.config_options = Arc::new(config_options); self } /// Specify how many times to attempt to optimize the plan pub fn with_max_passes(mut self, v: u8) -> Self { - self.options.optimizer.max_passes = v as usize; + let mut config_options = self.config_options.deref().clone(); + config_options.optimizer.max_passes = v as usize; + self.config_options = Arc::new(config_options); self } } @@ -186,8 +194,8 @@ impl OptimizerConfig for OptimizerContext { &self.alias_generator } - fn options(&self) -> &ConfigOptions { - &self.options + fn options(&self) -> &Arc { + &self.config_options } } diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index 0dbb78a2680e..aeb680df8363 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -23,6 +23,7 @@ use std::sync::Arc; use indexmap::IndexSet; use itertools::Itertools; +use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeRecursion, }; @@ -525,6 +526,7 @@ fn push_down_all_join( fn push_down_join( join: Join, parent_predicate: Option<&Expr>, + config_options: &Arc, ) -> Result> { // Split the parent predicate into individual conjunctive parts. let predicates = parent_predicate @@ -538,7 +540,7 @@ fn push_down_join( // Are there any new join predicates that can be inferred from the filter expressions? let inferred_join_predicates = - infer_join_predicates(&join, &predicates, &on_filters)?; + infer_join_predicates(&join, &predicates, &on_filters, config_options)?; if on_filters.is_empty() && predicates.is_empty() @@ -564,6 +566,7 @@ fn infer_join_predicates( join: &Join, predicates: &[Expr], on_filters: &[Expr], + config_options: &Arc, ) -> Result> { // Only allow both side key is column. let join_col_keys = join @@ -584,6 +587,7 @@ fn infer_join_predicates( &join_col_keys, predicates, &mut inferred_predicates, + config_options, )?; infer_join_predicates_from_on_filters( @@ -591,6 +595,7 @@ fn infer_join_predicates( join_type, on_filters, &mut inferred_predicates, + config_options, )?; Ok(inferred_predicates.predicates) @@ -621,12 +626,14 @@ impl InferredPredicates { &mut self, predicate: Expr, replace_map: &HashMap<&Column, &Column>, + config_options: &Arc, ) -> Result<()> { if self.is_inner_join || matches!( is_restrict_null_predicate( predicate.clone(), - replace_map.keys().cloned() + replace_map.keys().cloned(), + config_options, ), Ok(true) ) @@ -651,11 +658,13 @@ fn infer_join_predicates_from_predicates( join_col_keys: &[(&Column, &Column)], predicates: &[Expr], inferred_predicates: &mut InferredPredicates, + config_options: &Arc, ) -> Result<()> { infer_join_predicates_impl::( join_col_keys, predicates, inferred_predicates, + config_options, ) } @@ -676,6 +685,7 @@ fn infer_join_predicates_from_on_filters( join_type: JoinType, on_filters: &[Expr], inferred_predicates: &mut InferredPredicates, + config_options: &Arc, ) -> Result<()> { match join_type { JoinType::Full | JoinType::LeftAnti | JoinType::RightAnti => Ok(()), @@ -683,12 +693,14 @@ fn infer_join_predicates_from_on_filters( join_col_keys, on_filters, inferred_predicates, + config_options, ), JoinType::Left | JoinType::LeftSemi | JoinType::LeftMark => { infer_join_predicates_impl::( join_col_keys, on_filters, inferred_predicates, + config_options, ) } JoinType::Right | JoinType::RightSemi => { @@ -696,6 +708,7 @@ fn infer_join_predicates_from_on_filters( join_col_keys, on_filters, inferred_predicates, + config_options, ) } } @@ -724,6 +737,7 @@ fn infer_join_predicates_impl< join_col_keys: &[(&Column, &Column)], input_predicates: &[Expr], inferred_predicates: &mut InferredPredicates, + config_options: &Arc, ) -> Result<()> { for predicate in input_predicates { let mut join_cols_to_replace = HashMap::new(); @@ -744,8 +758,11 @@ fn infer_join_predicates_impl< continue; } - inferred_predicates - .try_build_predicate(predicate.clone(), &join_cols_to_replace)?; + inferred_predicates.try_build_predicate( + predicate.clone(), + &join_cols_to_replace, + config_options, + )?; } Ok(()) } @@ -766,10 +783,10 @@ impl OptimizerRule for PushDownFilter { fn rewrite( &self, plan: LogicalPlan, - _config: &dyn OptimizerConfig, + config: &dyn OptimizerConfig, ) -> Result> { if let LogicalPlan::Join(join) = plan { - return push_down_join(join, None); + return push_down_join(join, None, config.options()); }; let plan_schema = Arc::clone(plan.schema()); @@ -799,7 +816,7 @@ impl OptimizerRule for PushDownFilter { new_predicate, child_filter.input, )?); - self.rewrite(new_filter, _config) + self.rewrite(new_filter, config) } LogicalPlan::Repartition(repartition) => { let new_filter = @@ -1081,7 +1098,9 @@ impl OptimizerRule for PushDownFilter { } }) } - LogicalPlan::Join(join) => push_down_join(join, Some(&filter.predicate)), + LogicalPlan::Join(join) => { + push_down_join(join, Some(&filter.predicate), config.options()) + } LogicalPlan::TableScan(scan) => { let filter_predicates = split_conjunction(&filter.predicate); diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs index 3a8aef267be5..3e5a281a6cf2 100644 --- a/datafusion/optimizer/src/scalar_subquery_to_join.rs +++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs @@ -26,6 +26,7 @@ use crate::utils::replace_qualified_name; use crate::{OptimizerConfig, OptimizerRule}; use datafusion_common::alias::AliasGenerator; +use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{ Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter, }; @@ -99,7 +100,7 @@ impl OptimizerRule for ScalarSubqueryToJoin { let mut cur_input = filter.input.as_ref().clone(); for (subquery, alias) in subqueries { if let Some((optimized_subquery, expr_check_map)) = - build_join(&subquery, &cur_input, &alias)? + build_join(&subquery, &cur_input, &alias, config.options())? { if !expr_check_map.is_empty() { rewrite_expr = rewrite_expr @@ -153,7 +154,7 @@ impl OptimizerRule for ScalarSubqueryToJoin { let mut cur_input = projection.input.as_ref().clone(); for (subquery, alias) in all_subqueries { if let Some((optimized_subquery, expr_check_map)) = - build_join(&subquery, &cur_input, &alias)? + build_join(&subquery, &cur_input, &alias, config.options())? { cur_input = optimized_subquery; if !expr_check_map.is_empty() { @@ -295,9 +296,11 @@ fn build_join( subquery: &Subquery, filter_input: &LogicalPlan, subquery_alias: &str, + config_options: &Arc, ) -> Result)>> { let subquery_plan = subquery.subquery.as_ref(); - let mut pull_up = PullUpCorrelatedExpr::new().with_need_handle_count_bug(true); + let mut pull_up = + PullUpCorrelatedExpr::new(config_options).with_need_handle_count_bug(true); let new_plan = subquery_plan.clone().rewrite(&mut pull_up).data()?; if !pull_up.can_pull_up { return Ok(None); diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index d5a1b84e6aff..457252e95306 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -20,6 +20,7 @@ use std::borrow::Cow; use std::collections::HashSet; use std::ops::Not; +use std::sync::Arc; use arrow::{ array::{new_null_array, AsArray}, @@ -29,6 +30,7 @@ use arrow::{ use datafusion_common::{ cast::{as_large_list_array, as_list_array}, + config::ConfigOptions, tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter}, }; use datafusion_common::{internal_err, DFSchema, DataFusionError, Result, ScalarValue}; @@ -71,6 +73,7 @@ use regex::Regex; /// use arrow::datatypes::{Schema, Field, DataType}; /// use datafusion_expr::{col, lit}; /// use datafusion_common::{DataFusionError, ToDFSchema}; +/// use datafusion_common::config::ConfigOptions; /// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::simplify::SimplifyContext; /// use datafusion_optimizer::simplify_expressions::ExprSimplifier; @@ -83,7 +86,8 @@ use regex::Regex; /// /// // Create the simplifier /// let props = ExecutionProps::new(); -/// let context = SimplifyContext::new(&props) +/// let config_options = ConfigOptions::default_singleton_arc(); +/// let context = SimplifyContext::new(&props, config_options) /// .with_schema(schema); /// let simplifier = ExprSimplifier::new(context); /// @@ -142,7 +146,9 @@ impl ExprSimplifier { /// `b > 2` /// /// ``` + /// use std::sync::Arc; /// use arrow::datatypes::DataType; + /// use datafusion_common::config::ConfigOptions; /// use datafusion_expr::{col, lit, Expr}; /// use datafusion_common::Result; /// use datafusion_expr::execution_props::ExecutionProps; @@ -150,14 +156,14 @@ impl ExprSimplifier { /// use datafusion_expr::simplify::SimplifyInfo; /// use datafusion_optimizer::simplify_expressions::ExprSimplifier; /// use datafusion_common::DFSchema; - /// use std::sync::Arc; /// /// /// Simple implementation that provides `Simplifier` the information it needs /// /// See SimplifyContext for a structure that does this. /// #[derive(Default)] - /// struct Info { + /// struct Info<'a> { /// execution_props: ExecutionProps, - /// }; + /// config_options: &'a Arc, + /// } /// /// impl SimplifyInfo for Info { /// fn is_boolean_type(&self, expr: &Expr) -> Result { @@ -169,6 +175,9 @@ impl ExprSimplifier { /// fn execution_props(&self) -> &ExecutionProps { /// &self.execution_props /// } + /// fn config_options(&self) -> &Arc { + /// self.config_options + /// } /// fn get_data_type(&self, expr: &Expr) -> Result { /// Ok(DataType::Int32) /// } @@ -200,7 +209,10 @@ impl ExprSimplifier { /// pub fn simplify_with_cycle_count(&self, mut expr: Expr) -> Result<(Expr, u32)> { let mut simplifier = Simplifier::new(&self.info); - let mut const_evaluator = ConstEvaluator::try_new(self.info.execution_props())?; + let mut const_evaluator = ConstEvaluator::try_new( + self.info.execution_props(), + self.info.config_options(), + )?; let mut shorten_in_list_simplifier = ShortenInListSimplifier::new(); let mut guarantee_rewriter = GuaranteeRewriter::new(&self.guarantees); @@ -255,6 +267,7 @@ impl ExprSimplifier { /// use datafusion_expr::{col, lit, Expr}; /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval}; /// use datafusion_common::{Result, ScalarValue, ToDFSchema}; + /// use datafusion_common::config::ConfigOptions; /// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::simplify::SimplifyContext; /// use datafusion_optimizer::simplify_expressions::ExprSimplifier; @@ -268,7 +281,8 @@ impl ExprSimplifier { /// /// // Create the simplifier /// let props = ExecutionProps::new(); - /// let context = SimplifyContext::new(&props) + /// let config_options = ConfigOptions::default_singleton_arc(); + /// let context = SimplifyContext::new(&props, config_options) /// .with_schema(schema); /// /// // Expression: (x >= 3) AND (y + 2 < 10) AND (z > 5) @@ -314,6 +328,7 @@ impl ExprSimplifier { /// use datafusion_expr::{col, lit, Expr}; /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval}; /// use datafusion_common::{Result, ScalarValue, ToDFSchema}; + /// use datafusion_common::config::ConfigOptions; /// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::simplify::SimplifyContext; /// use datafusion_optimizer::simplify_expressions::ExprSimplifier; @@ -327,7 +342,8 @@ impl ExprSimplifier { /// /// // Create the simplifier /// let props = ExecutionProps::new(); - /// let context = SimplifyContext::new(&props) + /// let config_options = ConfigOptions::default_singleton_arc(); + /// let context = SimplifyContext::new(&props, config_options) /// .with_schema(schema); /// let simplifier = ExprSimplifier::new(context); /// @@ -373,6 +389,7 @@ impl ExprSimplifier { /// use arrow::datatypes::{DataType, Field, Schema}; /// use datafusion_expr::{col, lit, Expr}; /// use datafusion_common::{Result, ScalarValue, ToDFSchema}; + /// use datafusion_common::config::ConfigOptions; /// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::simplify::SimplifyContext; /// use datafusion_optimizer::simplify_expressions::ExprSimplifier; @@ -384,7 +401,8 @@ impl ExprSimplifier { /// /// // Create the simplifier /// let props = ExecutionProps::new(); - /// let context = SimplifyContext::new(&props) + /// let config_options = ConfigOptions::default_singleton_arc(); + /// let context = SimplifyContext::new(&props, config_options) /// .with_schema(schema); /// let simplifier = ExprSimplifier::new(context); /// @@ -481,6 +499,7 @@ struct ConstEvaluator<'a> { can_evaluate: Vec, execution_props: &'a ExecutionProps, + config_options: &'a Arc, input_schema: DFSchema, input_batch: RecordBatch, } @@ -553,7 +572,10 @@ impl<'a> ConstEvaluator<'a> { /// Create a new `ConstantEvaluator`. Session constants (such as /// the time for `now()` are taken from the passed /// `execution_props`. - pub fn try_new(execution_props: &'a ExecutionProps) -> Result { + pub fn try_new( + execution_props: &'a ExecutionProps, + config_options: &'a Arc, + ) -> Result { // The dummy column name is unused and doesn't matter as only // expressions without column references can be evaluated static DUMMY_COL_NAME: &str = "."; @@ -561,11 +583,12 @@ impl<'a> ConstEvaluator<'a> { let input_schema = DFSchema::try_from(schema.clone())?; // Need a single "input" row to produce a single output row let col = new_null_array(&DataType::Null, 1); - let input_batch = RecordBatch::try_new(std::sync::Arc::new(schema), vec![col])?; + let input_batch = RecordBatch::try_new(Arc::new(schema), vec![col])?; Ok(Self { can_evaluate: vec![], execution_props, + config_options, input_schema, input_batch, }) @@ -636,11 +659,15 @@ impl<'a> ConstEvaluator<'a> { return ConstSimplifyResult::NotSimplified(s); } - let phys_expr = - match create_physical_expr(&expr, &self.input_schema, self.execution_props) { - Ok(e) => e, - Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr), - }; + let phys_expr = match create_physical_expr( + &expr, + &self.input_schema, + self.execution_props, + self.config_options, + ) { + Ok(e) => e, + Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr), + }; let col_val = match phys_expr.evaluate(&self.input_batch) { Ok(v) => v, Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr), @@ -2026,8 +2053,10 @@ mod tests { #[test] fn api_basic() { let props = ExecutionProps::new(); - let simplifier = - ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema())); + let config_options = ConfigOptions::default_singleton_arc(); + let simplifier = ExprSimplifier::new( + SimplifyContext::new(&props, config_options).with_schema(test_schema()), + ); let expr = lit(1) + lit(2); let expected = lit(3); @@ -2038,8 +2067,9 @@ mod tests { fn basic_coercion() { let schema = test_schema(); let props = ExecutionProps::new(); + let config_options = ConfigOptions::default_singleton_arc(); let simplifier = ExprSimplifier::new( - SimplifyContext::new(&props).with_schema(Arc::clone(&schema)), + SimplifyContext::new(&props, config_options).with_schema(Arc::clone(&schema)), ); // Note expr type is int32 (not int64) @@ -2065,8 +2095,10 @@ mod tests { #[test] fn simplify_and_constant_prop() { let props = ExecutionProps::new(); - let simplifier = - ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema())); + let config_options = ConfigOptions::default_singleton_arc(); + let simplifier = ExprSimplifier::new( + SimplifyContext::new(&props, config_options).with_schema(test_schema()), + ); // should be able to simplify to false // (i * (1 - 2)) > 0 @@ -2078,8 +2110,10 @@ mod tests { #[test] fn simplify_and_constant_prop_with_case() { let props = ExecutionProps::new(); - let simplifier = - ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema())); + let config_options = ConfigOptions::default_singleton_arc(); + let simplifier = ExprSimplifier::new( + SimplifyContext::new(&props, config_options).with_schema(test_schema()), + ); // CASE // WHEN i>5 AND false THEN i > 5 @@ -3179,8 +3213,9 @@ mod tests { fn try_simplify(expr: Expr) -> Result { let schema = expr_test_schema(); let execution_props = ExecutionProps::new(); + let config_options = ConfigOptions::default_singleton_arc(); let simplifier = ExprSimplifier::new( - SimplifyContext::new(&execution_props).with_schema(schema), + SimplifyContext::new(&execution_props, config_options).with_schema(schema), ); simplifier.simplify(expr) } @@ -3192,8 +3227,9 @@ mod tests { fn try_simplify_with_cycle_count(expr: Expr) -> Result<(Expr, u32)> { let schema = expr_test_schema(); let execution_props = ExecutionProps::new(); + let config_options = ConfigOptions::default_singleton_arc(); let simplifier = ExprSimplifier::new( - SimplifyContext::new(&execution_props).with_schema(schema), + SimplifyContext::new(&execution_props, config_options).with_schema(schema), ); simplifier.simplify_with_cycle_count(expr) } @@ -3208,8 +3244,9 @@ mod tests { ) -> Expr { let schema = expr_test_schema(); let execution_props = ExecutionProps::new(); + let config_options = ConfigOptions::default_singleton_arc(); let simplifier = ExprSimplifier::new( - SimplifyContext::new(&execution_props).with_schema(schema), + SimplifyContext::new(&execution_props, config_options).with_schema(schema), ) .with_guarantees(guarantees); simplifier.simplify(expr).unwrap() @@ -4089,9 +4126,11 @@ mod tests { #[test] fn simplify_common_factor_conjunction_in_disjunction() { let props = ExecutionProps::new(); + let config_options = ConfigOptions::default_singleton_arc(); let schema = boolean_test_schema(); - let simplifier = - ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema)); + let simplifier = ExprSimplifier::new( + SimplifyContext::new(&props, config_options).with_schema(schema), + ); let a = || col("A"); let b = || col("B"); diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs index 709d8f79c3d9..6b987bfb9da4 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs @@ -19,6 +19,7 @@ use std::sync::Arc; +use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result}; use datafusion_expr::execution_props::ExecutionProps; @@ -69,7 +70,7 @@ impl OptimizerRule for SimplifyExpressions { ) -> Result, DataFusionError> { let mut execution_props = ExecutionProps::new(); execution_props.query_execution_start_time = config.query_execution_start_time(); - Self::optimize_internal(plan, &execution_props) + Self::optimize_internal(plan, &execution_props, config.options()) } } @@ -77,6 +78,7 @@ impl SimplifyExpressions { fn optimize_internal( plan: LogicalPlan, execution_props: &ExecutionProps, + config_options: &Arc, ) -> Result> { let schema = if !plan.inputs().is_empty() { DFSchemaRef::new(merge_schema(&plan.inputs())) @@ -99,7 +101,8 @@ impl SimplifyExpressions { Arc::new(DFSchema::empty()) }; - let info = SimplifyContext::new(execution_props).with_schema(schema); + let info = + SimplifyContext::new(execution_props, config_options).with_schema(schema); // Inputs have already been rewritten (due to bottom-up traversal handled by Optimizer) // Just need to rewrite our own expressions diff --git a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs index 7670bdf98bb4..ca8f8913e62b 100644 --- a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs +++ b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs @@ -449,6 +449,7 @@ mod tests { use crate::simplify_expressions::ExprSimplifier; use arrow::compute::{cast_with_options, CastOptions}; use arrow::datatypes::Field; + use datafusion_common::config::ConfigOptions; use datafusion_common::{DFSchema, DFSchemaRef}; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::simplify::SimplifyContext; @@ -760,7 +761,8 @@ mod tests { fn optimize_test(expr: Expr, schema: &DFSchemaRef) -> Expr { let props = ExecutionProps::new(); let simplifier = ExprSimplifier::new( - SimplifyContext::new(&props).with_schema(Arc::clone(schema)), + SimplifyContext::new(&props, ConfigOptions::default_singleton_arc()) + .with_schema(Arc::clone(schema)), ); simplifier.simplify(expr).unwrap() diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs index c734d908f6d6..d474c234fab6 100644 --- a/datafusion/optimizer/src/utils.rs +++ b/datafusion/optimizer/src/utils.rs @@ -23,6 +23,7 @@ use crate::analyzer::type_coercion::TypeCoercionRewriter; use arrow::array::{new_null_array, Array, RecordBatch}; use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::cast::as_boolean_array; +use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{TransformedResult, TreeNode}; use datafusion_common::{Column, DFSchema, Result, ScalarValue}; use datafusion_expr::execution_props::ExecutionProps; @@ -74,6 +75,7 @@ pub fn log_plan(description: &str, plan: &LogicalPlan) { pub fn is_restrict_null_predicate<'a>( predicate: Expr, join_cols_of_predicate: impl IntoIterator, + config_options: &Arc, ) -> Result { if matches!(predicate, Expr::Column(_)) { return Ok(true); @@ -94,8 +96,12 @@ pub fn is_restrict_null_predicate<'a>( let replaced_predicate = replace_col(predicate, &join_cols_to_replace)?; let coerced_predicate = coerce(replaced_predicate, &input_schema)?; - let phys_expr = - create_physical_expr(&coerced_predicate, &input_schema, &execution_props)?; + let phys_expr = create_physical_expr( + &coerced_predicate, + &input_schema, + &execution_props, + config_options, + )?; let result_type = phys_expr.data_type(&schema)?; if !matches!(&result_type, DataType::Boolean) { @@ -217,8 +223,12 @@ mod tests { let column_a = Column::from_name("a"); for (predicate, expected) in test_cases { let join_cols_of_predicate = std::iter::once(&column_a); - let actual = - is_restrict_null_predicate(predicate.clone(), join_cols_of_predicate)?; + let config_options = ConfigOptions::default_singleton_arc(); + let actual = is_restrict_null_predicate( + predicate.clone(), + join_cols_of_predicate, + config_options, + )?; assert_eq!(actual, expected, "{}", predicate); } diff --git a/datafusion/physical-expr/src/analysis.rs b/datafusion/physical-expr/src/analysis.rs index 5abd50f6d1b4..49a42ab6eb6d 100644 --- a/datafusion/physical-expr/src/analysis.rs +++ b/datafusion/physical-expr/src/analysis.rs @@ -302,6 +302,7 @@ mod tests { use std::sync::Arc; use arrow::datatypes::{DataType, Field, Schema}; + use datafusion_common::config::ConfigOptions; use datafusion_common::{assert_contains, DFSchema}; use datafusion_expr::{ col, execution_props::ExecutionProps, interval_arithmetic::Interval, lit, Expr, @@ -367,8 +368,13 @@ mod tests { for (expr, lower, upper) in test_cases { let boundaries = ExprBoundaries::try_new_unbounded(&schema).unwrap(); let df_schema = DFSchema::try_from(Arc::clone(&schema)).unwrap(); - let physical_expr = - create_physical_expr(&expr, &df_schema, &ExecutionProps::new()).unwrap(); + let physical_expr = create_physical_expr( + &expr, + &df_schema, + &ExecutionProps::new(), + ConfigOptions::default_singleton_arc(), + ) + .unwrap(); let analysis_result = analyze( &physical_expr, AnalysisContext::new(boundaries), @@ -402,12 +408,18 @@ mod tests { .and(col("a").gt(lit(20))) .and(col("a").lt(lit(30))), ]; + let config_options = ConfigOptions::default_singleton_arc(); for expr in test_cases { let boundaries = ExprBoundaries::try_new_unbounded(&schema).unwrap(); let df_schema = DFSchema::try_from(Arc::clone(&schema)).unwrap(); - let physical_expr = - create_physical_expr(&expr, &df_schema, &ExecutionProps::new()).unwrap(); + let physical_expr = create_physical_expr( + &expr, + &df_schema, + &ExecutionProps::new(), + config_options, + ) + .unwrap(); let analysis_result = analyze( &physical_expr, AnalysisContext::new(boundaries), @@ -428,8 +440,13 @@ mod tests { let expected_error = "Interval arithmetic does not support the operator OR"; let boundaries = ExprBoundaries::try_new_unbounded(&schema).unwrap(); let df_schema = DFSchema::try_from(Arc::clone(&schema)).unwrap(); - let physical_expr = - create_physical_expr(&expr, &df_schema, &ExecutionProps::new()).unwrap(); + let physical_expr = create_physical_expr( + &expr, + &df_schema, + &ExecutionProps::new(), + ConfigOptions::default_singleton_arc(), + ) + .unwrap(); let analysis_error = analyze( &physical_expr, AnalysisContext::new(boundaries), diff --git a/datafusion/physical-expr/src/equivalence/ordering.rs b/datafusion/physical-expr/src/equivalence/ordering.rs index 0efd46ad912e..dbb85e7513b9 100644 --- a/datafusion/physical-expr/src/equivalence/ordering.rs +++ b/datafusion/physical-expr/src/equivalence/ordering.rs @@ -359,6 +359,7 @@ mod tests { use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, Schema}; + use datafusion_common::config::ConfigOptions; use datafusion_common::Result; use datafusion_expr::{Operator, ScalarUDF}; use datafusion_physical_expr_common::sort_expr::LexOrdering; @@ -409,21 +410,25 @@ mod tests { let col_e = &col("e", &test_schema)?; let col_f = &col("f", &test_schema)?; let test_fun = Arc::new(ScalarUDF::new_from_impl(TestScalarUDF::new())); + let config_options = ConfigOptions::default_singleton_arc(); let floor_a = Arc::new(ScalarFunctionExpr::try_new( Arc::clone(&test_fun), vec![Arc::clone(col_a)], &test_schema, + config_options, )?) as PhysicalExprRef; let floor_f = Arc::new(ScalarFunctionExpr::try_new( Arc::clone(&test_fun), vec![Arc::clone(col_f)], &test_schema, + config_options, )?) as PhysicalExprRef; let exp_a = Arc::new(ScalarFunctionExpr::try_new( Arc::clone(&test_fun), vec![Arc::clone(col_a)], &test_schema, + config_options, )?) as PhysicalExprRef; let a_plus_b = Arc::new(BinaryExpr::new( diff --git a/datafusion/physical-expr/src/equivalence/projection.rs b/datafusion/physical-expr/src/equivalence/projection.rs index 035678fbf1f3..10de901e656e 100644 --- a/datafusion/physical-expr/src/equivalence/projection.rs +++ b/datafusion/physical-expr/src/equivalence/projection.rs @@ -148,6 +148,7 @@ mod tests { use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; + use datafusion_common::config::ConfigOptions; use datafusion_expr::{Operator, ScalarUDF}; #[test] @@ -672,6 +673,7 @@ mod tests { test_fun, vec![Arc::clone(col_c)], &schema, + ConfigOptions::default_singleton_arc(), )?) as PhysicalExprRef; let option_asc = SortOptions { diff --git a/datafusion/physical-expr/src/equivalence/properties/dependency.rs b/datafusion/physical-expr/src/equivalence/properties/dependency.rs index 9eba295e562e..fbff71645a58 100644 --- a/datafusion/physical-expr/src/equivalence/properties/dependency.rs +++ b/datafusion/physical-expr/src/equivalence/properties/dependency.rs @@ -438,6 +438,7 @@ mod tests { use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; + use datafusion_common::config::ConfigOptions; use datafusion_common::{Constraint, Constraints, Result}; use datafusion_expr::sort_properties::SortProperties; use datafusion_expr::Operator; @@ -1225,6 +1226,7 @@ mod tests { concat(), vec![Arc::clone(&col_a), Arc::clone(&col_b)], DataType::Utf8, + Arc::clone(ConfigOptions::default_singleton_arc()), )); // Assume existing ordering is [c ASC, a ASC, b ASC] @@ -1316,6 +1318,7 @@ mod tests { concat(), vec![Arc::clone(&col_a), Arc::clone(&col_b)], DataType::Utf8, + Arc::clone(ConfigOptions::default_singleton_arc()), )); // Assume existing ordering is [concat(a, b) ASC, a ASC, b ASC] diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index fac83dfc4524..4b94ff610416 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -24,6 +24,7 @@ use crate::{ }; use arrow::datatypes::Schema; +use datafusion_common::config::ConfigOptions; use datafusion_common::{ exec_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue, ToDFSchema, }; @@ -51,6 +52,7 @@ use datafusion_expr::{ /// # Example: Create `PhysicalExpr` from `Expr` /// ``` /// # use arrow::datatypes::{DataType, Field, Schema}; +/// # use datafusion_common::config::ConfigOptions; /// # use datafusion_common::DFSchema; /// # use datafusion_expr::{Expr, col, lit}; /// # use datafusion_physical_expr::create_physical_expr; @@ -62,8 +64,10 @@ use datafusion_expr::{ /// let df_schema = DFSchema::try_from(schema).unwrap(); /// // 2. ExecutionProps /// let props = ExecutionProps::new(); +/// // 3. ConfigOptions +/// let config_options = ConfigOptions::default_singleton_arc(); /// // We can now create a PhysicalExpr: -/// let physical_expr = create_physical_expr(&expr, &df_schema, &props).unwrap(); +/// let physical_expr = create_physical_expr(&expr, &df_schema, &props, &config_options).unwrap(); /// ``` /// /// # Example: Executing a PhysicalExpr to obtain [ColumnarValue] @@ -72,6 +76,7 @@ use datafusion_expr::{ /// # use arrow::array::{cast::AsArray, BooleanArray, Int32Array, RecordBatch}; /// # use arrow::datatypes::{DataType, Field, Schema}; /// # use datafusion_common::{assert_batches_eq, DFSchema}; +/// # use datafusion_common::config::ConfigOptions; /// # use datafusion_expr::{Expr, col, lit, ColumnarValue}; /// # use datafusion_physical_expr::create_physical_expr; /// # use datafusion_expr::execution_props::ExecutionProps; @@ -79,8 +84,9 @@ use datafusion_expr::{ /// # let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]); /// # let df_schema = DFSchema::try_from(schema.clone()).unwrap(); /// # let props = ExecutionProps::new(); +/// # let config_options = ConfigOptions::default_singleton_arc(); /// // Given a PhysicalExpr, for `a = 1` we can evaluate it against a RecordBatch like this: -/// let physical_expr = create_physical_expr(&expr, &df_schema, &props).unwrap(); +/// let physical_expr = create_physical_expr(&expr, &df_schema, &props, &config_options).unwrap(); /// // Input of [1,2,3] /// let input_batch = RecordBatch::try_from_iter(vec![ /// ("a", Arc::new(Int32Array::from(vec![1, 2, 3])) as _) @@ -107,13 +113,17 @@ pub fn create_physical_expr( e: &Expr, input_dfschema: &DFSchema, execution_props: &ExecutionProps, + config_options: &Arc, ) -> Result> { let input_schema: &Schema = &input_dfschema.into(); match e { - Expr::Alias(Alias { expr, .. }) => { - Ok(create_physical_expr(expr, input_dfschema, execution_props)?) - } + Expr::Alias(Alias { expr, .. }) => Ok(create_physical_expr( + expr, + input_dfschema, + execution_props, + config_options, + )?), Expr::Column(c) => { let idx = input_dfschema.index_of_column(c)?; Ok(Arc::new(Column::new(&c.name, idx))) @@ -144,12 +154,22 @@ pub fn create_physical_expr( Operator::IsNotDistinctFrom, lit(true), ); - create_physical_expr(&binary_op, input_dfschema, execution_props) + create_physical_expr( + &binary_op, + input_dfschema, + execution_props, + config_options, + ) } Expr::IsNotTrue(expr) => { let binary_op = binary_expr(expr.as_ref().clone(), Operator::IsDistinctFrom, lit(true)); - create_physical_expr(&binary_op, input_dfschema, execution_props) + create_physical_expr( + &binary_op, + input_dfschema, + execution_props, + config_options, + ) } Expr::IsFalse(expr) => { let binary_op = binary_expr( @@ -157,12 +177,22 @@ pub fn create_physical_expr( Operator::IsNotDistinctFrom, lit(false), ); - create_physical_expr(&binary_op, input_dfschema, execution_props) + create_physical_expr( + &binary_op, + input_dfschema, + execution_props, + config_options, + ) } Expr::IsNotFalse(expr) => { let binary_op = binary_expr(expr.as_ref().clone(), Operator::IsDistinctFrom, lit(false)); - create_physical_expr(&binary_op, input_dfschema, execution_props) + create_physical_expr( + &binary_op, + input_dfschema, + execution_props, + config_options, + ) } Expr::IsUnknown(expr) => { let binary_op = binary_expr( @@ -170,7 +200,12 @@ pub fn create_physical_expr( Operator::IsNotDistinctFrom, Expr::Literal(ScalarValue::Boolean(None)), ); - create_physical_expr(&binary_op, input_dfschema, execution_props) + create_physical_expr( + &binary_op, + input_dfschema, + execution_props, + config_options, + ) } Expr::IsNotUnknown(expr) => { let binary_op = binary_expr( @@ -178,12 +213,27 @@ pub fn create_physical_expr( Operator::IsDistinctFrom, Expr::Literal(ScalarValue::Boolean(None)), ); - create_physical_expr(&binary_op, input_dfschema, execution_props) + create_physical_expr( + &binary_op, + input_dfschema, + execution_props, + config_options, + ) } Expr::BinaryExpr(BinaryExpr { left, op, right }) => { // Create physical expressions for left and right operands - let lhs = create_physical_expr(left, input_dfschema, execution_props)?; - let rhs = create_physical_expr(right, input_dfschema, execution_props)?; + let lhs = create_physical_expr( + left, + input_dfschema, + execution_props, + config_options, + )?; + let rhs = create_physical_expr( + right, + input_dfschema, + execution_props, + config_options, + )?; // Note that the logical planner is responsible // for type coercion on the arguments (e.g. if one // argument was originally Int32 and one was @@ -206,10 +256,18 @@ pub fn create_physical_expr( "LIKE does not support escape_char other than the backslash (\\)" ); } - let physical_expr = - create_physical_expr(expr, input_dfschema, execution_props)?; - let physical_pattern = - create_physical_expr(pattern, input_dfschema, execution_props)?; + let physical_expr = create_physical_expr( + expr, + input_dfschema, + execution_props, + config_options, + )?; + let physical_pattern = create_physical_expr( + pattern, + input_dfschema, + execution_props, + config_options, + )?; like( *negated, *case_insensitive, @@ -228,10 +286,18 @@ pub fn create_physical_expr( if escape_char.is_some() { return exec_err!("SIMILAR TO does not support escape_char yet"); } - let physical_expr = - create_physical_expr(expr, input_dfschema, execution_props)?; - let physical_pattern = - create_physical_expr(pattern, input_dfschema, execution_props)?; + let physical_expr = create_physical_expr( + expr, + input_dfschema, + execution_props, + config_options, + )?; + let physical_pattern = create_physical_expr( + pattern, + input_dfschema, + execution_props, + config_options, + )?; similar_to(*negated, *case_insensitive, physical_expr, physical_pattern) } Expr::Case(case) => { @@ -240,6 +306,7 @@ pub fn create_physical_expr( e.as_ref(), input_dfschema, execution_props, + config_options, )?) } else { None @@ -249,10 +316,18 @@ pub fn create_physical_expr( .iter() .map(|(w, t)| (w.as_ref(), t.as_ref())) .unzip(); - let when_expr = - create_physical_exprs(when_expr, input_dfschema, execution_props)?; - let then_expr = - create_physical_exprs(then_expr, input_dfschema, execution_props)?; + let when_expr = create_physical_exprs( + when_expr, + input_dfschema, + execution_props, + config_options, + )?; + let then_expr = create_physical_exprs( + then_expr, + input_dfschema, + execution_props, + config_options, + )?; let when_then_expr: Vec<(Arc, Arc)> = when_expr .iter() @@ -265,6 +340,7 @@ pub fn create_physical_expr( e.as_ref(), input_dfschema, execution_props, + config_options, )?) } else { None @@ -272,40 +348,50 @@ pub fn create_physical_expr( Ok(expressions::case(expr, when_then_expr, else_expr)?) } Expr::Cast(Cast { expr, data_type }) => expressions::cast( - create_physical_expr(expr, input_dfschema, execution_props)?, + create_physical_expr(expr, input_dfschema, execution_props, config_options)?, input_schema, data_type.clone(), ), Expr::TryCast(TryCast { expr, data_type }) => expressions::try_cast( - create_physical_expr(expr, input_dfschema, execution_props)?, + create_physical_expr(expr, input_dfschema, execution_props, config_options)?, input_schema, data_type.clone(), ), - Expr::Not(expr) => { - expressions::not(create_physical_expr(expr, input_dfschema, execution_props)?) - } + Expr::Not(expr) => expressions::not(create_physical_expr( + expr, + input_dfschema, + execution_props, + config_options, + )?), Expr::Negative(expr) => expressions::negative( - create_physical_expr(expr, input_dfschema, execution_props)?, + create_physical_expr(expr, input_dfschema, execution_props, config_options)?, input_schema, ), Expr::IsNull(expr) => expressions::is_null(create_physical_expr( expr, input_dfschema, execution_props, + config_options, )?), Expr::IsNotNull(expr) => expressions::is_not_null(create_physical_expr( expr, input_dfschema, execution_props, + config_options, )?), Expr::ScalarFunction(ScalarFunction { func, args }) => { - let physical_args = - create_physical_exprs(args, input_dfschema, execution_props)?; + let physical_args = create_physical_exprs( + args, + input_dfschema, + execution_props, + config_options, + )?; Ok(Arc::new(ScalarFunctionExpr::try_new( Arc::clone(func), physical_args, input_schema, + config_options, )?)) } Expr::Between(Between { @@ -314,9 +400,24 @@ pub fn create_physical_expr( low, high, }) => { - let value_expr = create_physical_expr(expr, input_dfschema, execution_props)?; - let low_expr = create_physical_expr(low, input_dfschema, execution_props)?; - let high_expr = create_physical_expr(high, input_dfschema, execution_props)?; + let value_expr = create_physical_expr( + expr, + input_dfschema, + execution_props, + config_options, + )?; + let low_expr = create_physical_expr( + low, + input_dfschema, + execution_props, + config_options, + )?; + let high_expr = create_physical_expr( + high, + input_dfschema, + execution_props, + config_options, + )?; // rewrite the between into the two binary operators let binary_expr = binary( @@ -351,11 +452,19 @@ pub fn create_physical_expr( Ok(expressions::lit(ScalarValue::Boolean(None))) } _ => { - let value_expr = - create_physical_expr(expr, input_dfschema, execution_props)?; + let value_expr = create_physical_expr( + expr, + input_dfschema, + execution_props, + config_options, + )?; - let list_exprs = - create_physical_exprs(list, input_dfschema, execution_props)?; + let list_exprs = create_physical_exprs( + list, + input_dfschema, + execution_props, + config_options, + )?; expressions::in_list(value_expr, list_exprs, negated, input_schema) } }, @@ -373,13 +482,16 @@ pub fn create_physical_exprs<'a, I>( exprs: I, input_dfschema: &DFSchema, execution_props: &ExecutionProps, + config_options: &Arc, ) -> Result>> where I: IntoIterator, { exprs .into_iter() - .map(|expr| create_physical_expr(expr, input_dfschema, execution_props)) + .map(|expr| { + create_physical_expr(expr, input_dfschema, execution_props, config_options) + }) .collect::>>() } @@ -387,7 +499,9 @@ where pub fn logical2physical(expr: &Expr, schema: &Schema) -> Arc { let df_schema = schema.clone().to_dfschema().unwrap(); let execution_props = ExecutionProps::new(); - create_physical_expr(expr, &df_schema, &execution_props).unwrap() + // usages of this are only in tests so this should be acceptable + let config_options = ConfigOptions::default_singleton_arc(); + create_physical_expr(expr, &df_schema, &execution_props, config_options).unwrap() } #[cfg(test)] @@ -405,7 +519,13 @@ mod tests { let schema = Schema::new(vec![Field::new("letter", DataType::Utf8, false)]); let df_schema = DFSchema::try_from_qualified_schema("data", &schema)?; - let p = create_physical_expr(&expr, &df_schema, &ExecutionProps::new())?; + let config_options = ConfigOptions::default_singleton_arc(); + let p = create_physical_expr( + &expr, + &df_schema, + &ExecutionProps::new(), + config_options, + )?; let batch = RecordBatch::try_new( Arc::new(schema), diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index cf8cc6e00c80..bb78893482a0 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -31,7 +31,7 @@ use std::any::Any; use std::fmt::{self, Debug, Formatter}; -use std::hash::Hash; +use std::hash::{Hash, Hasher}; use std::sync::Arc; use crate::expressions::Literal; @@ -39,6 +39,7 @@ use crate::PhysicalExpr; use arrow::array::{Array, RecordBatch}; use arrow::datatypes::{DataType, Schema}; +use datafusion_common::config::ConfigOptions; use datafusion_common::{internal_err, Result, ScalarValue}; use datafusion_expr::interval_arithmetic::Interval; use datafusion_expr::sort_properties::ExprProperties; @@ -46,15 +47,17 @@ use datafusion_expr::type_coercion::functions::data_types_with_scalar_udf; use datafusion_expr::{ expr_vec_fmt, ColumnarValue, ReturnTypeArgs, ScalarFunctionArgs, ScalarUDF, }; +use datafusion_physical_expr_common::physical_expr::{DynEq, DynHash}; +use itertools::Itertools; /// Physical expression of a scalar function -#[derive(Eq, PartialEq, Hash)] pub struct ScalarFunctionExpr { fun: Arc, name: String, args: Vec>, return_type: DataType, nullable: bool, + config_options: Arc, } impl Debug for ScalarFunctionExpr { @@ -75,6 +78,7 @@ impl ScalarFunctionExpr { fun: Arc, args: Vec>, return_type: DataType, + config_options: Arc, ) -> Self { Self { fun, @@ -82,6 +86,7 @@ impl ScalarFunctionExpr { args, return_type, nullable: true, + config_options, } } @@ -90,6 +95,7 @@ impl ScalarFunctionExpr { fun: Arc, args: Vec>, schema: &Schema, + config_options: &Arc, ) -> Result { let name = fun.name().to_string(); let arg_types = args @@ -125,6 +131,7 @@ impl ScalarFunctionExpr { args, return_type, nullable, + config_options: Arc::clone(config_options), }) } @@ -164,6 +171,47 @@ impl fmt::Display for ScalarFunctionExpr { } } +impl DynEq for ScalarFunctionExpr { + fn dyn_eq(&self, other: &dyn Any) -> bool { + other.downcast_ref::().is_some_and(|o| { + let eq = self.fun.eq(&o.fun); + let eq = eq && self.name.eq(&o.name); + let eq = eq && self.args.eq(&o.args); + let eq = eq && self.return_type.eq(&o.return_type); + let eq = eq && self.nullable.eq(&o.nullable); + let eq = eq + && self + .config_options + .entries() + .iter() + .sorted_by(|&l, &r| l.key.cmp(&r.key)) + .zip( + o.config_options + .entries() + .iter() + .sorted_by(|&l, &r| l.key.cmp(&r.key)), + ) + .filter(|(l, r)| l.ne(r)) + .count() + == 0; + + eq + }) + } +} + +impl DynHash for ScalarFunctionExpr { + fn dyn_hash(&self, mut state: &mut dyn Hasher) { + self.type_id().hash(&mut state); + self.fun.hash(&mut state); + self.name.hash(&mut state); + self.args.hash(&mut state); + self.return_type.hash(&mut state); + self.nullable.hash(&mut state); + self.config_options.entries().hash(&mut state); + } +} + impl PhysicalExpr for ScalarFunctionExpr { /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { @@ -195,6 +243,7 @@ impl PhysicalExpr for ScalarFunctionExpr { args, number_rows: batch.num_rows(), return_type: &self.return_type, + config_options: &self.config_options, })?; if let ColumnarValue::Array(array) = &output { @@ -228,6 +277,7 @@ impl PhysicalExpr for ScalarFunctionExpr { Arc::clone(&self.fun), children, self.return_type().clone(), + Arc::clone(&self.config_options), ) .with_nullable(self.nullable), )) diff --git a/datafusion/proto/src/bytes/mod.rs b/datafusion/proto/src/bytes/mod.rs index da01d89c0c3d..646631823e79 100644 --- a/datafusion/proto/src/bytes/mod.rs +++ b/datafusion/proto/src/bytes/mod.rs @@ -308,7 +308,12 @@ pub fn physical_plan_from_json( let back: protobuf::PhysicalPlanNode = serde_json::from_str(json) .map_err(|e| plan_datafusion_err!("Error serializing plan: {e}"))?; let extension_codec = DefaultPhysicalExtensionCodec {}; - back.try_into_physical_plan(ctx, &ctx.runtime_env(), &extension_codec) + back.try_into_physical_plan( + ctx, + ctx.state().config_options(), + &ctx.runtime_env(), + &extension_codec, + ) } /// Deserialize a PhysicalPlan from bytes @@ -328,5 +333,10 @@ pub fn physical_plan_from_bytes_with_extension_codec( ) -> Result> { let protobuf = protobuf::PhysicalPlanNode::decode(bytes) .map_err(|e| plan_datafusion_err!("Error decoding expr as protobuf: {e}"))?; - protobuf.try_into_physical_plan(ctx, &ctx.runtime_env(), extension_codec) + protobuf.try_into_physical_plan( + ctx, + ctx.state().config_options(), + &ctx.runtime_env(), + extension_codec, + ) } diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs index 6331b7fb3114..90708b3ffc08 100644 --- a/datafusion/proto/src/physical_plan/from_proto.rs +++ b/datafusion/proto/src/physical_plan/from_proto.rs @@ -42,6 +42,7 @@ use datafusion::physical_plan::expressions::{ }; use datafusion::physical_plan::windows::{create_window_expr, schema_add_window_field}; use datafusion::physical_plan::{Partitioning, PhysicalExpr, WindowExpr}; +use datafusion_common::config::ConfigOptions; use datafusion_common::{not_impl_err, DataFusionError, Result}; use datafusion_proto_common::common::proto_error; @@ -70,11 +71,18 @@ impl From<&protobuf::PhysicalColumn> for Column { pub fn parse_physical_sort_expr( proto: &protobuf::PhysicalSortExprNode, registry: &dyn FunctionRegistry, + config_options: &ConfigOptions, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result { if let Some(expr) = &proto.expr { - let expr = parse_physical_expr(expr.as_ref(), registry, input_schema, codec)?; + let expr = parse_physical_expr( + expr.as_ref(), + registry, + config_options, + input_schema, + codec, + )?; let options = SortOptions { descending: !proto.asc, nulls_first: proto.nulls_first, @@ -97,13 +105,20 @@ pub fn parse_physical_sort_expr( pub fn parse_physical_sort_exprs( proto: &[protobuf::PhysicalSortExprNode], registry: &dyn FunctionRegistry, + config_options: &ConfigOptions, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result { proto .iter() .map(|sort_expr| { - parse_physical_sort_expr(sort_expr, registry, input_schema, codec) + parse_physical_sort_expr( + sort_expr, + registry, + config_options, + input_schema, + codec, + ) }) .collect::>() } @@ -121,16 +136,27 @@ pub fn parse_physical_sort_exprs( pub fn parse_physical_window_expr( proto: &protobuf::PhysicalWindowExprNode, registry: &dyn FunctionRegistry, + config_options: &ConfigOptions, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { let window_node_expr = - parse_physical_exprs(&proto.args, registry, input_schema, codec)?; - let partition_by = - parse_physical_exprs(&proto.partition_by, registry, input_schema, codec)?; - - let order_by = - parse_physical_sort_exprs(&proto.order_by, registry, input_schema, codec)?; + parse_physical_exprs(&proto.args, registry, config_options, input_schema, codec)?; + let partition_by = parse_physical_exprs( + &proto.partition_by, + registry, + config_options, + input_schema, + codec, + )?; + + let order_by = parse_physical_sort_exprs( + &proto.order_by, + registry, + config_options, + input_schema, + codec, + )?; let window_frame = proto .window_frame @@ -182,6 +208,7 @@ pub fn parse_physical_window_expr( pub fn parse_physical_exprs<'a, I>( protos: I, registry: &dyn FunctionRegistry, + config_options: &ConfigOptions, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result>> @@ -190,7 +217,7 @@ where { protos .into_iter() - .map(|p| parse_physical_expr(p, registry, input_schema, codec)) + .map(|p| parse_physical_expr(p, registry, config_options, input_schema, codec)) .collect::>>() } @@ -206,6 +233,7 @@ where pub fn parse_physical_expr( proto: &protobuf::PhysicalExprNode, registry: &dyn FunctionRegistry, + config_options: &ConfigOptions, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -225,6 +253,7 @@ pub fn parse_physical_expr( parse_required_physical_expr( binary_expr.l.as_deref(), registry, + config_options, "left", input_schema, codec, @@ -233,6 +262,7 @@ pub fn parse_physical_expr( parse_required_physical_expr( binary_expr.r.as_deref(), registry, + config_options, "right", input_schema, codec, @@ -255,6 +285,7 @@ pub fn parse_physical_expr( Arc::new(IsNullExpr::new(parse_required_physical_expr( e.expr.as_deref(), registry, + config_options, "expr", input_schema, codec, @@ -264,6 +295,7 @@ pub fn parse_physical_expr( Arc::new(IsNotNullExpr::new(parse_required_physical_expr( e.expr.as_deref(), registry, + config_options, "expr", input_schema, codec, @@ -272,6 +304,7 @@ pub fn parse_physical_expr( ExprType::NotExpr(e) => Arc::new(NotExpr::new(parse_required_physical_expr( e.expr.as_deref(), registry, + config_options, "expr", input_schema, codec, @@ -280,6 +313,7 @@ pub fn parse_physical_expr( Arc::new(NegativeExpr::new(parse_required_physical_expr( e.expr.as_deref(), registry, + config_options, "expr", input_schema, codec, @@ -289,18 +323,27 @@ pub fn parse_physical_expr( parse_required_physical_expr( e.expr.as_deref(), registry, + config_options, "expr", input_schema, codec, )?, - parse_physical_exprs(&e.list, registry, input_schema, codec)?, + parse_physical_exprs(&e.list, registry, config_options, input_schema, codec)?, &e.negated, input_schema, )?, ExprType::Case(e) => Arc::new(CaseExpr::try_new( e.expr .as_ref() - .map(|e| parse_physical_expr(e.as_ref(), registry, input_schema, codec)) + .map(|e| { + parse_physical_expr( + e.as_ref(), + registry, + config_options, + input_schema, + codec, + ) + }) .transpose()?, e.when_then_expr .iter() @@ -309,6 +352,7 @@ pub fn parse_physical_expr( parse_required_physical_expr( e.when_expr.as_ref(), registry, + config_options, "when_expr", input_schema, codec, @@ -316,6 +360,7 @@ pub fn parse_physical_expr( parse_required_physical_expr( e.then_expr.as_ref(), registry, + config_options, "then_expr", input_schema, codec, @@ -325,13 +370,22 @@ pub fn parse_physical_expr( .collect::>>()?, e.else_expr .as_ref() - .map(|e| parse_physical_expr(e.as_ref(), registry, input_schema, codec)) + .map(|e| { + parse_physical_expr( + e.as_ref(), + registry, + config_options, + input_schema, + codec, + ) + }) .transpose()?, )?), ExprType::Cast(e) => Arc::new(CastExpr::new( parse_required_physical_expr( e.expr.as_deref(), registry, + config_options, "expr", input_schema, codec, @@ -343,6 +397,7 @@ pub fn parse_physical_expr( parse_required_physical_expr( e.expr.as_deref(), registry, + config_options, "expr", input_schema, codec, @@ -356,7 +411,13 @@ pub fn parse_physical_expr( }; let scalar_fun_def = Arc::clone(&udf); - let args = parse_physical_exprs(&e.args, registry, input_schema, codec)?; + let args = parse_physical_exprs( + &e.args, + registry, + config_options, + input_schema, + codec, + )?; Arc::new( ScalarFunctionExpr::new( @@ -364,6 +425,7 @@ pub fn parse_physical_expr( scalar_fun_def, args, convert_required!(e.return_type)?, + Arc::new(config_options.clone()), ) .with_nullable(e.nullable), ) @@ -374,6 +436,7 @@ pub fn parse_physical_expr( parse_required_physical_expr( like_expr.expr.as_deref(), registry, + config_options, "expr", input_schema, codec, @@ -381,6 +444,7 @@ pub fn parse_physical_expr( parse_required_physical_expr( like_expr.pattern.as_deref(), registry, + config_options, "pattern", input_schema, codec, @@ -390,7 +454,9 @@ pub fn parse_physical_expr( let inputs: Vec> = extension .inputs .iter() - .map(|e| parse_physical_expr(e, registry, input_schema, codec)) + .map(|e| { + parse_physical_expr(e, registry, config_options, input_schema, codec) + }) .collect::>()?; (codec.try_decode_expr(extension.expr.as_slice(), &inputs)?) as _ } @@ -402,11 +468,12 @@ pub fn parse_physical_expr( fn parse_required_physical_expr( expr: Option<&protobuf::PhysicalExprNode>, registry: &dyn FunctionRegistry, + config_options: &ConfigOptions, field: &str, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { - expr.map(|e| parse_physical_expr(e, registry, input_schema, codec)) + expr.map(|e| parse_physical_expr(e, registry, config_options, input_schema, codec)) .transpose()? .ok_or_else(|| { DataFusionError::Internal(format!("Missing required field {field:?}")) @@ -416,6 +483,7 @@ fn parse_required_physical_expr( pub fn parse_protobuf_hash_partitioning( partitioning: Option<&protobuf::PhysicalHashRepartition>, registry: &dyn FunctionRegistry, + config_options: &ConfigOptions, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -424,6 +492,7 @@ pub fn parse_protobuf_hash_partitioning( let expr = parse_physical_exprs( &hash_part.hash_expr, registry, + config_options, input_schema, codec, )?; @@ -440,6 +509,7 @@ pub fn parse_protobuf_hash_partitioning( pub fn parse_protobuf_partitioning( partitioning: Option<&protobuf::Partitioning>, registry: &dyn FunctionRegistry, + config_options: &ConfigOptions, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -454,6 +524,7 @@ pub fn parse_protobuf_partitioning( parse_protobuf_hash_partitioning( Some(hash_repartition), registry, + config_options, input_schema, codec, ) @@ -478,6 +549,7 @@ pub fn parse_protobuf_file_scan_schema( pub fn parse_protobuf_file_scan_config( proto: &protobuf::FileScanExecConf, registry: &dyn FunctionRegistry, + config_options: &ConfigOptions, codec: &dyn PhysicalExtensionCodec, file_source: Arc, ) -> Result { @@ -531,6 +603,7 @@ pub fn parse_protobuf_file_scan_config( let sort_expr = parse_physical_sort_exprs( &node_collection.physical_sort_expr_nodes, registry, + config_options, &schema, codec, )?; diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs index 60972ac54ba7..64daa14582b5 100644 --- a/datafusion/proto/src/physical_plan/mod.rs +++ b/datafusion/proto/src/physical_plan/mod.rs @@ -66,7 +66,7 @@ use datafusion::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec}; use datafusion::physical_plan::{ ExecutionPlan, InputOrderMode, PhysicalExpr, WindowExpr, }; -use datafusion_common::config::TableParquetOptions; +use datafusion_common::config::{ConfigOptions, TableParquetOptions}; use datafusion_common::{internal_err, not_impl_err, DataFusionError, Result}; use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF}; @@ -117,6 +117,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { fn try_into_physical_plan( &self, registry: &dyn FunctionRegistry, + config_options: &ConfigOptions, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -139,6 +140,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let input: Arc = into_physical_plan( &projection.input, registry, + config_options, runtime, extension_codec, )?; @@ -151,6 +153,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { parse_physical_expr( expr, registry, + config_options, input.schema().as_ref(), extension_codec, )?, @@ -164,6 +167,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let input: Arc = into_physical_plan( &filter.input, registry, + config_options, runtime, extension_codec, )?; @@ -174,6 +178,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { parse_physical_expr( expr, registry, + config_options, input.schema().as_ref(), extension_codec, ) @@ -240,6 +245,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let conf = parse_protobuf_file_scan_config( scan.base_conf.as_ref().unwrap(), registry, + config_options, extension_codec, source, )? @@ -261,6 +267,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { parse_physical_expr( expr, registry, + config_options, schema.as_ref(), extension_codec, ) @@ -279,6 +286,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let base_config = parse_protobuf_file_scan_config( scan.base_conf.as_ref().unwrap(), registry, + config_options, extension_codec, Arc::new(source), )?; @@ -294,6 +302,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let conf = parse_protobuf_file_scan_config( scan.base_conf.as_ref().unwrap(), registry, + config_options, extension_codec, Arc::new(AvroSource::new()), )?; @@ -306,6 +315,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let input: Arc = into_physical_plan( &coalesce_batches.input, registry, + config_options, runtime, extension_codec, )?; @@ -318,20 +328,27 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { )) } PhysicalPlanType::Merge(merge) => { - let input: Arc = - into_physical_plan(&merge.input, registry, runtime, extension_codec)?; + let input: Arc = into_physical_plan( + &merge.input, + registry, + config_options, + runtime, + extension_codec, + )?; Ok(Arc::new(CoalescePartitionsExec::new(input))) } PhysicalPlanType::Repartition(repart) => { let input: Arc = into_physical_plan( &repart.input, registry, + config_options, runtime, extension_codec, )?; let partitioning = parse_protobuf_partitioning( repart.partitioning.as_ref(), registry, + config_options, input.schema().as_ref(), extension_codec, )?; @@ -341,8 +358,13 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { )?)) } PhysicalPlanType::GlobalLimit(limit) => { - let input: Arc = - into_physical_plan(&limit.input, registry, runtime, extension_codec)?; + let input: Arc = into_physical_plan( + &limit.input, + registry, + config_options, + runtime, + extension_codec, + )?; let fetch = if limit.fetch >= 0 { Some(limit.fetch as usize) } else { @@ -355,14 +377,20 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { ))) } PhysicalPlanType::LocalLimit(limit) => { - let input: Arc = - into_physical_plan(&limit.input, registry, runtime, extension_codec)?; + let input: Arc = into_physical_plan( + &limit.input, + registry, + config_options, + runtime, + extension_codec, + )?; Ok(Arc::new(LocalLimitExec::new(input, limit.fetch as usize))) } PhysicalPlanType::Window(window_agg) => { let input: Arc = into_physical_plan( &window_agg.input, registry, + config_options, runtime, extension_codec, )?; @@ -375,6 +403,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { parse_physical_window_expr( window_expr, registry, + config_options, input_schema.as_ref(), extension_codec, ) @@ -388,6 +417,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { parse_physical_expr( expr, registry, + config_options, input.schema().as_ref(), extension_codec, ) @@ -427,6 +457,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let input: Arc = into_physical_plan( &hash_agg.input, registry, + config_options, runtime, extension_codec, )?; @@ -460,6 +491,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { parse_physical_expr( expr, registry, + config_options, input.schema().as_ref(), extension_codec, ) @@ -475,6 +507,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { parse_physical_expr( expr, registry, + config_options, input.schema().as_ref(), extension_codec, ) @@ -509,6 +542,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { parse_physical_expr( e, registry, + config_options, &physical_schema, extension_codec, ) @@ -529,9 +563,9 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { match expr_type { ExprType::AggregateExpr(agg_node) => { let input_phy_expr: Vec> = agg_node.expr.iter() - .map(|e| parse_physical_expr(e, registry, &physical_schema, extension_codec)).collect::>>()?; + .map(|e| parse_physical_expr(e, registry, config_options, &physical_schema, extension_codec)).collect::>>()?; let ordering_req: LexOrdering = agg_node.ordering_req.iter() - .map(|e| parse_physical_sort_expr(e, registry, &physical_schema, extension_codec)) + .map(|e| parse_physical_sort_expr(e, registry, config_options, &physical_schema, extension_codec)) .collect::>()?; agg_node.aggregate_function.as_ref().map(|func| { match func { @@ -584,12 +618,14 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let left: Arc = into_physical_plan( &hashjoin.left, registry, + config_options, runtime, extension_codec, )?; let right: Arc = into_physical_plan( &hashjoin.right, registry, + config_options, runtime, extension_codec, )?; @@ -602,12 +638,14 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let left = parse_physical_expr( &col.left.clone().unwrap(), registry, + config_options, left_schema.as_ref(), extension_codec, )?; let right = parse_physical_expr( &col.right.clone().unwrap(), registry, + config_options, right_schema.as_ref(), extension_codec, )?; @@ -635,7 +673,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { f.expression.as_ref().ok_or_else(|| { proto_error("Unexpected empty filter expression") })?, - registry, &schema, + registry, config_options, &schema, extension_codec, )?; let column_indices = f.column_indices @@ -698,12 +736,14 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let left = into_physical_plan( &sym_join.left, registry, + config_options, runtime, extension_codec, )?; let right = into_physical_plan( &sym_join.right, registry, + config_options, runtime, extension_codec, )?; @@ -716,12 +756,14 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let left = parse_physical_expr( &col.left.clone().unwrap(), registry, + config_options, left_schema.as_ref(), extension_codec, )?; let right = parse_physical_expr( &col.right.clone().unwrap(), registry, + config_options, right_schema.as_ref(), extension_codec, )?; @@ -749,7 +791,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { f.expression.as_ref().ok_or_else(|| { proto_error("Unexpected empty filter expression") })?, - registry, &schema, + registry, config_options, &schema, extension_codec, )?; let column_indices = f.column_indices @@ -775,6 +817,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let left_sort_exprs = parse_physical_sort_exprs( &sym_join.left_sort_exprs, registry, + config_options, &left_schema, extension_codec, )?; @@ -787,6 +830,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let right_sort_exprs = parse_physical_sort_exprs( &sym_join.right_sort_exprs, registry, + config_options, &right_schema, extension_codec, )?; @@ -829,6 +873,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { for input in &union.inputs { inputs.push(input.try_into_physical_plan( registry, + config_options, runtime, extension_codec, )?); @@ -840,6 +885,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { for input in &interleave.inputs { inputs.push(input.try_into_physical_plan( registry, + config_options, runtime, extension_codec, )?); @@ -850,12 +896,14 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let left: Arc = into_physical_plan( &crossjoin.left, registry, + config_options, runtime, extension_codec, )?; let right: Arc = into_physical_plan( &crossjoin.right, registry, + config_options, runtime, extension_codec, )?; @@ -870,8 +918,13 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { Ok(Arc::new(PlaceholderRowExec::new(schema))) } PhysicalPlanType::Sort(sort) => { - let input: Arc = - into_physical_plan(&sort.input, registry, runtime, extension_codec)?; + let input: Arc = into_physical_plan( + &sort.input, + registry, + config_options, + runtime, + extension_codec, + )?; let exprs = sort .expr .iter() @@ -892,7 +945,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { })? .as_ref(); Ok(PhysicalSortExpr { - expr: parse_physical_expr(expr, registry, input.schema().as_ref(), extension_codec)?, + expr: parse_physical_expr(expr, registry, config_options, input.schema().as_ref(), extension_codec)?, options: SortOptions { descending: !sort_expr.asc, nulls_first: sort_expr.nulls_first, @@ -917,8 +970,13 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { Ok(Arc::new(new_sort)) } PhysicalPlanType::SortPreservingMerge(sort) => { - let input: Arc = - into_physical_plan(&sort.input, registry, runtime, extension_codec)?; + let input: Arc = into_physical_plan( + &sort.input, + registry, + config_options, + runtime, + extension_codec, + )?; let exprs = sort .expr .iter() @@ -939,7 +997,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { })? .as_ref(); Ok(PhysicalSortExpr { - expr: parse_physical_expr(expr, registry, input.schema().as_ref(), extension_codec)?, + expr: parse_physical_expr(expr, registry, config_options, input.schema().as_ref(), extension_codec)?, options: SortOptions { descending: !sort_expr.asc, nulls_first: sort_expr.nulls_first, @@ -965,7 +1023,14 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let inputs: Vec> = extension .inputs .iter() - .map(|i| i.try_into_physical_plan(registry, runtime, extension_codec)) + .map(|i| { + i.try_into_physical_plan( + registry, + config_options, + runtime, + extension_codec, + ) + }) .collect::>()?; let extension_node = extension_codec.try_decode( @@ -977,10 +1042,20 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { Ok(extension_node) } PhysicalPlanType::NestedLoopJoin(join) => { - let left: Arc = - into_physical_plan(&join.left, registry, runtime, extension_codec)?; - let right: Arc = - into_physical_plan(&join.right, registry, runtime, extension_codec)?; + let left: Arc = into_physical_plan( + &join.left, + registry, + config_options, + runtime, + extension_codec, + )?; + let right: Arc = into_physical_plan( + &join.right, + registry, + config_options, + runtime, + extension_codec, + )?; let join_type = protobuf::JoinType::try_from(join.join_type).map_err(|_| { proto_error(format!( @@ -1002,7 +1077,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { f.expression.as_ref().ok_or_else(|| { proto_error("Unexpected empty filter expression") })?, - registry, &schema, + registry, config_options, &schema, extension_codec, )?; let column_indices = f.column_indices @@ -1048,6 +1123,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let input: Arc = into_physical_plan( &analyze.input, registry, + config_options, runtime, extension_codec, )?; @@ -1059,8 +1135,13 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { ))) } PhysicalPlanType::JsonSink(sink) => { - let input = - into_physical_plan(&sink.input, registry, runtime, extension_codec)?; + let input = into_physical_plan( + &sink.input, + registry, + config_options, + runtime, + extension_codec, + )?; let data_sink: JsonSink = sink .sink @@ -1075,6 +1156,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { parse_physical_sort_exprs( &collection.physical_sort_expr_nodes, registry, + config_options, &sink_schema, extension_codec, ) @@ -1088,8 +1170,13 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { ))) } PhysicalPlanType::CsvSink(sink) => { - let input = - into_physical_plan(&sink.input, registry, runtime, extension_codec)?; + let input = into_physical_plan( + &sink.input, + registry, + config_options, + runtime, + extension_codec, + )?; let data_sink: CsvSink = sink .sink @@ -1104,6 +1191,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { parse_physical_sort_exprs( &collection.physical_sort_expr_nodes, registry, + config_options, &sink_schema, extension_codec, ) @@ -1123,6 +1211,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let input = into_physical_plan( &sink.input, registry, + config_options, runtime, extension_codec, )?; @@ -1140,6 +1229,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { parse_physical_sort_exprs( &collection.physical_sort_expr_nodes, registry, + config_options, &sink_schema, extension_codec, ) @@ -1159,6 +1249,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let input = into_physical_plan( &unnest.input, registry, + config_options, runtime, extension_codec, )?; @@ -2140,6 +2231,7 @@ pub trait AsExecutionPlan: Debug + Send + Sync + Clone { fn try_into_physical_plan( &self, registry: &dyn FunctionRegistry, + config_options: &ConfigOptions, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result>; @@ -2230,11 +2322,12 @@ impl PhysicalExtensionCodec for DefaultPhysicalExtensionCodec { fn into_physical_plan( node: &Option>, registry: &dyn FunctionRegistry, + config_options: &ConfigOptions, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result, DataFusionError> { if let Some(field) = node { - field.try_into_physical_plan(registry, runtime, extension_codec) + field.try_into_physical_plan(registry, config_options, runtime, extension_codec) } else { Err(proto_error("Missing required field in protobuf")) } diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 9cc7514a0d33..7a6e52348463 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -41,7 +41,9 @@ use datafusion::datasource::file_format::arrow::ArrowFormatFactory; use datafusion::datasource::file_format::csv::CsvFormatFactory; use datafusion::datasource::file_format::parquet::ParquetFormatFactory; use datafusion::datasource::file_format::{format_as_file_type, DefaultFileType}; -use datafusion::execution::session_state::SessionStateBuilder; +use datafusion::execution::session_state::{ + SessionStateBuilder, SessionStateOptimizerConfig, +}; use datafusion::execution::FunctionRegistry; use datafusion::functions_aggregate::count::count_udaf; use datafusion::functions_aggregate::expr_fn::{ @@ -2544,7 +2546,10 @@ async fn roundtrip_union_query() -> Result<()> { // proto deserialization only supports 2-way union, hence this plan has nested unions // apply the flatten unions optimizer rule to be able to compare let optimizer = Optimizer::with_rules(vec![Arc::new(EliminateNestedUnion::new())]); - let unnested = optimizer.optimize(logical_round_trip, &(ctx.state()), |_x, _y| {})?; + let session_state = ctx.state(); + let session_optimizer_config = SessionStateOptimizerConfig::new(&session_state); + let unnested = + optimizer.optimize(logical_round_trip, &session_optimizer_config, |_x, _y| {})?; assert_eq!( format!("{}", plan.display_indent_schema()), format!("{}", unnested.display_indent_schema()), diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs index b5bfef99a6f3..b0151d5e2d61 100644 --- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs @@ -88,7 +88,7 @@ use datafusion::physical_plan::{ }; use datafusion::prelude::{ParquetReadOptions, SessionContext}; use datafusion::scalar::ScalarValue; -use datafusion_common::config::TableParquetOptions; +use datafusion_common::config::{ConfigOptions, TableParquetOptions}; use datafusion_common::file_options::csv_writer::CsvWriterOptions; use datafusion_common::file_options::json_writer::JsonWriterOptions; use datafusion_common::parsers::CompressionTypeVariant; @@ -135,7 +135,12 @@ fn roundtrip_test_and_return( .expect("to proto"); let runtime = ctx.runtime_env(); let result_exec_plan: Arc = proto - .try_into_physical_plan(ctx, runtime.deref(), codec) + .try_into_physical_plan( + ctx, + ConfigOptions::default_singleton(), + runtime.deref(), + codec, + ) .expect("from proto"); assert_eq!(format!("{exec_plan:?}"), format!("{result_exec_plan:?}")); Ok(result_exec_plan) @@ -951,6 +956,7 @@ fn roundtrip_scalar_udf() -> Result<()> { fun_def, vec![col("a", &schema)?], DataType::Int64, + Arc::clone(ConfigOptions::default_singleton_arc()), ); let project = @@ -1079,6 +1085,7 @@ fn roundtrip_scalar_udf_extension_codec() -> Result<()> { Arc::new(ScalarUDF::from(MyRegexUdf::new(".*".to_string()))), vec![col("text", &schema)?], DataType::Int64, + Arc::clone(ConfigOptions::default_singleton_arc()), )); let filter = Arc::new(FilterExec::try_new( @@ -1181,6 +1188,7 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> { Arc::new(ScalarUDF::from(MyRegexUdf::new(".*".to_string()))), vec![col("text", &schema)?], DataType::Int64, + Arc::clone(ConfigOptions::default_singleton_arc()), )); let udaf = Arc::new(AggregateUDF::from(MyAggregateUDF::new( @@ -1557,6 +1565,7 @@ async fn roundtrip_coalesce() -> Result<()> { .map_err(|e| DataFusionError::External(Box::new(e)))?; let restored = node.try_into_physical_plan( &ctx, + ctx.state().config_options(), ctx.runtime_env().as_ref(), &DefaultPhysicalExtensionCodec {}, )?; diff --git a/datafusion/wasmtest/src/lib.rs b/datafusion/wasmtest/src/lib.rs index e2ba50beb657..587d85f560ca 100644 --- a/datafusion/wasmtest/src/lib.rs +++ b/datafusion/wasmtest/src/lib.rs @@ -23,6 +23,7 @@ extern crate wasm_bindgen; +use datafusion_common::config::ConfigOptions; use datafusion_common::{DFSchema, ScalarValue}; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::lit; @@ -64,8 +65,10 @@ pub fn basic_exprs() { // Simplify Expr (using datafusion-phys-expr and datafusion-optimizer) let schema = Arc::new(DFSchema::empty()); let execution_props = ExecutionProps::new(); - let simplifier = - ExprSimplifier::new(SimplifyContext::new(&execution_props).with_schema(schema)); + let config_options = ConfigOptions::default_singleton_arc(); + let simplifier = ExprSimplifier::new( + SimplifyContext::new(&execution_props, config_options).with_schema(schema), + ); let simplified_expr = simplifier.simplify(expr).unwrap(); log(&format!("Simplified Expr: {simplified_expr:?}")); } From 250653735d98e62887cba4a52fac256d1c162c56 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Thu, 13 Mar 2025 18:00:46 +0000 Subject: [PATCH 2/5] Fixed doc. --- .../src/simplify_expressions/expr_simplifier.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 457252e95306..c2f9a06664a1 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -159,13 +159,12 @@ impl ExprSimplifier { /// /// /// Simple implementation that provides `Simplifier` the information it needs /// /// See SimplifyContext for a structure that does this. - /// #[derive(Default)] /// struct Info<'a> { /// execution_props: ExecutionProps, /// config_options: &'a Arc, /// } /// - /// impl SimplifyInfo for Info { + /// impl SimplifyInfo for Info<'_> { /// fn is_boolean_type(&self, expr: &Expr) -> Result { /// Ok(false) /// } @@ -184,7 +183,12 @@ impl ExprSimplifier { /// } /// /// // Create the simplifier - /// let simplifier = ExprSimplifier::new(Info::default()); + /// let config_options = Arc::new(ConfigOptions::new()); + /// let info = Info { + /// execution_props: ExecutionProps::new(), + /// config_options: &config_options, + /// }; + /// let simplifier = ExprSimplifier::new(info); /// /// // b < 2 /// let b_lt_2 = col("b").gt(lit(2)); From 4f50b017b4b94f7b1566ec90ad839e684ae21733 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Wed, 26 Mar 2025 17:11:58 +0000 Subject: [PATCH 3/5] Updates from merge operation. --- datafusion/functions-nested/src/array_has.rs | 16 +++++++++++++--- datafusion/physical-expr/src/physical_expr.rs | 19 +++++++++++++++++-- datafusion/proto/src/physical_plan/mod.rs | 1 + 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index 48ee341566b9..fe003d9a08f0 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -595,6 +595,7 @@ fn general_array_has_all_and_any_kernel( #[cfg(test)] mod tests { use arrow::array::create_array; + use datafusion_common::config::ConfigOptions; use datafusion_common::utils::SingleRowListArrayBuilder; use datafusion_expr::{ col, execution_props::ExecutionProps, lit, simplify::ExprSimplifyResult, Expr, @@ -615,7 +616,10 @@ mod tests { let needle = col("c"); let props = ExecutionProps::new(); - let context = datafusion_expr::simplify::SimplifyContext::new(&props); + let context = datafusion_expr::simplify::SimplifyContext::new( + &props, + ConfigOptions::default_singleton_arc(), + ); let Ok(ExprSimplifyResult::Simplified(Expr::InList(in_list))) = ArrayHas::new().simplify(vec![haystack, needle.clone()], &context) @@ -639,7 +643,10 @@ mod tests { let needle = col("c"); let props = ExecutionProps::new(); - let context = datafusion_expr::simplify::SimplifyContext::new(&props); + let context = datafusion_expr::simplify::SimplifyContext::new( + &props, + ConfigOptions::default_singleton_arc(), + ); let Ok(ExprSimplifyResult::Simplified(Expr::InList(in_list))) = ArrayHas::new().simplify(vec![haystack, needle.clone()], &context) @@ -663,7 +670,10 @@ mod tests { let needle = col("c2"); let props = ExecutionProps::new(); - let context = datafusion_expr::simplify::SimplifyContext::new(&props); + let context = datafusion_expr::simplify::SimplifyContext::new( + &props, + ConfigOptions::default_singleton_arc(), + ); let Ok(ExprSimplifyResult::Original(args)) = ArrayHas::new().simplify(vec![haystack, needle.clone()], &context) diff --git a/datafusion/physical-expr/src/physical_expr.rs b/datafusion/physical-expr/src/physical_expr.rs index 63c4ccbb4b38..a8a7cf7e320e 100644 --- a/datafusion/physical-expr/src/physical_expr.rs +++ b/datafusion/physical-expr/src/physical_expr.rs @@ -63,6 +63,7 @@ pub fn physical_exprs_bag_equal( use crate::{expressions, LexOrdering, PhysicalSortExpr}; use arrow::compute::SortOptions; use arrow::datatypes::Schema; +use datafusion_common::config::ConfigOptions; use datafusion_common::plan_err; use datafusion_common::Result; use datafusion_expr::{Expr, SortExpr}; @@ -153,6 +154,7 @@ pub fn create_physical_sort_expr( e: &SortExpr, input_dfschema: &DFSchema, execution_props: &ExecutionProps, + config_options: &Arc, ) -> Result { let SortExpr { expr, @@ -160,7 +162,12 @@ pub fn create_physical_sort_expr( nulls_first, } = e; Ok(PhysicalSortExpr { - expr: create_physical_expr(expr, input_dfschema, execution_props)?, + expr: create_physical_expr( + expr, + input_dfschema, + execution_props, + config_options, + )?, options: SortOptions { descending: !asc, nulls_first: *nulls_first, @@ -173,10 +180,18 @@ pub fn create_physical_sort_exprs( exprs: &[SortExpr], input_dfschema: &DFSchema, execution_props: &ExecutionProps, + config_options: &Arc, ) -> Result { exprs .iter() - .map(|expr| create_physical_sort_expr(expr, input_dfschema, execution_props)) + .map(|expr| { + create_physical_sort_expr( + expr, + input_dfschema, + execution_props, + config_options, + ) + }) .collect::>() } diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs index 5aed93e645c6..a14326ef64e2 100644 --- a/datafusion/proto/src/physical_plan/mod.rs +++ b/datafusion/proto/src/physical_plan/mod.rs @@ -256,6 +256,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { let scan_conf = parse_protobuf_file_scan_config( scan.base_conf.as_ref().unwrap(), registry, + config_options, extension_codec, Arc::new(JsonSource::new()), )?; From aad3e0d460b11b245239359937ac26cf2830b32b Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Tue, 1 Apr 2025 14:17:26 +0000 Subject: [PATCH 4/5] Updates from main. --- datafusion/catalog/src/memory/table.rs | 2 ++ datafusion/ffi/src/udf/mod.rs | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/datafusion/catalog/src/memory/table.rs b/datafusion/catalog/src/memory/table.rs index 81243e2c4889..a2139bda37e2 100644 --- a/datafusion/catalog/src/memory/table.rs +++ b/datafusion/catalog/src/memory/table.rs @@ -238,6 +238,7 @@ impl TableProvider for MemTable { let sort_order = self.sort_order.lock(); if !sort_order.is_empty() { let df_schema = DFSchema::try_from(self.schema.as_ref().clone())?; + let config_options = Arc::new(state.config_options().clone()); let file_sort_order = sort_order .iter() @@ -246,6 +247,7 @@ impl TableProvider for MemTable { sort_exprs, &df_schema, state.execution_props(), + &config_options, ) }) .collect::>>()?; diff --git a/datafusion/ffi/src/udf/mod.rs b/datafusion/ffi/src/udf/mod.rs index 97e3a4c5140e..b2f3df4dc62f 100644 --- a/datafusion/ffi/src/udf/mod.rs +++ b/datafusion/ffi/src/udf/mod.rs @@ -28,7 +28,8 @@ use arrow::{ ffi::{from_ffi, to_ffi, FFI_ArrowSchema}, }; use datafusion::{ - config::ConfigOptions, error::DataFusionError, + config::ConfigOptions, + error::DataFusionError, logical_expr::{ type_coercion::functions::data_types_with_scalar_udf, ReturnInfo, ReturnTypeArgs, }, From 61e4ede3cfbed801bca712dd184b2e98118cdd28 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Tue, 8 Apr 2025 13:55:19 +0000 Subject: [PATCH 5/5] Updates from merge operation. --- datafusion/catalog/src/memory/table.rs | 3 ++- .../core/src/datasource/listing/table.rs | 2 +- datafusion/core/src/physical_planner.rs | 8 ++++++-- datafusion/datasource/src/file_scan_config.rs | 6 +++++- datafusion/ffi/src/udf/mod.rs | 3 ++- .../optimizer/src/scalar_subquery_to_join.rs | 2 +- datafusion/optimizer/src/utils.rs | 19 +++++++++++++++---- 7 files changed, 32 insertions(+), 11 deletions(-) diff --git a/datafusion/catalog/src/memory/table.rs b/datafusion/catalog/src/memory/table.rs index 81243e2c4889..9f7a608034dd 100644 --- a/datafusion/catalog/src/memory/table.rs +++ b/datafusion/catalog/src/memory/table.rs @@ -238,7 +238,7 @@ impl TableProvider for MemTable { let sort_order = self.sort_order.lock(); if !sort_order.is_empty() { let df_schema = DFSchema::try_from(self.schema.as_ref().clone())?; - + let config_options = Arc::new(state.config_options().clone()); let file_sort_order = sort_order .iter() .map(|sort_exprs| { @@ -246,6 +246,7 @@ impl TableProvider for MemTable { sort_exprs, &df_schema, state.execution_props(), + &config_options, ) }) .collect::>>()?; diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index f42fe3b117a6..46dc636484ae 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -891,7 +891,7 @@ impl TableProvider for ListingTable { } let output_ordering = self.try_create_output_ordering()?; - let config_options = Arc::new(session_state.config_options().clone()); + let config_options = Arc::new(state.config_options().clone()); match state .config_options() diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index fd5c5ae45788..811d59bca2f4 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -2155,8 +2155,12 @@ impl DefaultPhysicalPlanner { physical_name(e) }; - let physical_expr = - self.create_physical_expr(e, input_logical_schema, session_state); + let physical_expr = self.create_physical_expr( + e, + input_logical_schema, + session_state, + config_options, + ); // Check for possible column name mismatches let final_physical_expr = diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs index 426652ae304e..9a73bbe333d7 100644 --- a/datafusion/datasource/src/file_scan_config.rs +++ b/datafusion/datasource/src/file_scan_config.rs @@ -2351,12 +2351,16 @@ mod tests { // Setup sort expression let exec_props = ExecutionProps::new(); + let config_options = ConfigOptions::default_singleton_arc(); let df_schema = DFSchema::try_from_qualified_schema("test", schema.as_ref())?; let sort_expr = vec![col("value").sort(true, false)]; let physical_sort_exprs: Vec<_> = sort_expr .iter() - .map(|expr| create_physical_sort_expr(expr, &df_schema, &exec_props).unwrap()) + .map(|expr| { + create_physical_sort_expr(expr, &df_schema, &exec_props, config_options) + .unwrap() + }) .collect(); let sort_ordering = LexOrdering::from(physical_sort_exprs); diff --git a/datafusion/ffi/src/udf/mod.rs b/datafusion/ffi/src/udf/mod.rs index 97e3a4c5140e..b2f3df4dc62f 100644 --- a/datafusion/ffi/src/udf/mod.rs +++ b/datafusion/ffi/src/udf/mod.rs @@ -28,7 +28,8 @@ use arrow::{ ffi::{from_ffi, to_ffi, FFI_ArrowSchema}, }; use datafusion::{ - config::ConfigOptions, error::DataFusionError, + config::ConfigOptions, + error::DataFusionError, logical_expr::{ type_coercion::functions::data_types_with_scalar_udf, ReturnInfo, ReturnTypeArgs, }, diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs index 23db3c548348..e6a6c69e6a1f 100644 --- a/datafusion/optimizer/src/scalar_subquery_to_join.rs +++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs @@ -352,7 +352,7 @@ fn build_join( let mut computation_project_expr = HashMap::new(); if let Some(expr_map) = collected_count_expr_map { for (name, result) in expr_map { - if evaluates_to_null(result.clone(), result.column_refs())? { + if evaluates_to_null(result.clone(), result.column_refs(), config_options)? { // If expr always returns null when column is null, skip processing continue; } diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs index 48dca79af43f..8f2e006d774c 100644 --- a/datafusion/optimizer/src/utils.rs +++ b/datafusion/optimizer/src/utils.rs @@ -84,7 +84,11 @@ pub fn is_restrict_null_predicate<'a>( // If result is single `true`, return false; // If result is single `NULL` or `false`, return true; Ok( - match evaluate_expr_with_null_column(predicate, join_cols_of_predicate)? { + match evaluate_expr_with_null_column( + predicate, + join_cols_of_predicate, + config_options, + )? { ColumnarValue::Array(array) => { if array.len() == 1 { let boolean_array = as_boolean_array(&array)?; @@ -108,13 +112,14 @@ pub fn is_restrict_null_predicate<'a>( pub fn evaluates_to_null<'a>( predicate: Expr, null_columns: impl IntoIterator, + config_options: &Arc, ) -> Result { if matches!(predicate, Expr::Column(_)) { return Ok(true); } Ok( - match evaluate_expr_with_null_column(predicate, null_columns)? { + match evaluate_expr_with_null_column(predicate, null_columns, config_options)? { ColumnarValue::Array(_) => false, ColumnarValue::Scalar(scalar) => scalar.is_null(), }, @@ -124,6 +129,7 @@ pub fn evaluates_to_null<'a>( fn evaluate_expr_with_null_column<'a>( predicate: Expr, null_columns: impl IntoIterator, + config_options: &Arc, ) -> Result { static DUMMY_COL_NAME: &str = "?"; let schema = Schema::new(vec![Field::new(DUMMY_COL_NAME, DataType::Null, true)]); @@ -140,8 +146,13 @@ fn evaluate_expr_with_null_column<'a>( let replaced_predicate = replace_col(predicate, &join_cols_to_replace)?; let coerced_predicate = coerce(replaced_predicate, &input_schema)?; - create_physical_expr(&coerced_predicate, &input_schema, &execution_props, config_options)? - .evaluate(&input_batch) + create_physical_expr( + &coerced_predicate, + &input_schema, + &execution_props, + config_options, + )? + .evaluate(&input_batch) } fn coerce(expr: Expr, schema: &DFSchema) -> Result {