Skip to content

feat: Add ConfigOptions to ScalarFunctionArgs #13527

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
4 changes: 3 additions & 1 deletion datafusion-examples/examples/composed_extension_codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,10 @@ async fn main() {

// deserialize proto back to execution plan
let runtime = ctx.runtime_env();
let state = ctx.state();
let config_options = state.config_options();
let result_exec_plan: Arc<dyn ExecutionPlan> = proto
.try_into_physical_plan(&ctx, runtime.deref(), &composed_codec)
.try_into_physical_plan(&ctx, config_options, runtime.deref(), &composed_codec)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an API change, but I think it is required to thread the config options through as each argument is specific

We could potentially improve the try_into_physical_plan API (as a follow on PR) to make it easier to update the API in the future using a trait or something like

https://github.com/apache/datafusion/blob/e99e02b9b9093ceb0c13a2dd32a2a89beba47930/datafusion/expr/src/expr_schema.rs#L39-L38

So this would look something like

pub trait ProtobufContext { 
  /// return a function registry
  fn function_registry(&self) -> &dyn FunctionRegistry;
  /// return the runtime env
  fn runtime_env(&self) -> &RuntimeEnv;
  /// return the config options
  fn config_options(&self) -> &ConfigOptions;
  /// return extension codec
  fn extension_codec(&self) -> &dyn PhysicalExtensionCodec;
}
impl AsExecutionPlan for protobuf::PhysicalPlanNode {
...
    fn try_into_physical_plan(
        &self,
        registry: &dyn FunctionRegistry,
        config_options: &ConfigOptions,
        runtime: &RuntimeEnv,
        extension_codec: &dyn PhysicalExtensionCodec,
    ) -> Result<Arc<dyn ExecutionPlan>> {

.expect("from proto");

// assert that the original and deserialized execution plans are equal
Expand Down
39 changes: 18 additions & 21 deletions datafusion-examples/examples/expr_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use datafusion::common::stats::Precision;
use datafusion::common::tree_node::{Transformed, TreeNode};
use datafusion::common::{ColumnStatistics, DFSchema};
use datafusion::common::{ScalarValue, ToDFSchema};
use datafusion::config::ConfigOptions;
use datafusion::error::Result;
use datafusion::functions_aggregate::first_last::first_value_udaf;
use datafusion::logical_expr::execution_props::ExecutionProps;
Expand All @@ -35,7 +36,9 @@ use datafusion::logical_expr::simplify::SimplifyContext;
use datafusion::logical_expr::{ColumnarValue, ExprFunctionExt, ExprSchemable, Operator};
use datafusion::optimizer::analyzer::type_coercion::TypeCoercionRewriter;
use datafusion::optimizer::simplify_expressions::ExprSimplifier;
use datafusion::physical_expr::{analyze, AnalysisContext, ExprBoundaries};
use datafusion::physical_expr::{
analyze, create_physical_expr, AnalysisContext, ExprBoundaries,
};
use datafusion::prelude::*;

/// This example demonstrates the DataFusion [`Expr`] API.
Expand Down Expand Up @@ -176,7 +179,8 @@ fn simplify_demo() -> Result<()> {
// expressions, such as the current time (to evaluate `now()`
// correctly)
let props = ExecutionProps::new();
let context = SimplifyContext::new(&props).with_schema(schema);
let config_options = ConfigOptions::default_singleton_arc();
let context = SimplifyContext::new(&props, config_options).with_schema(schema);
let simplifier = ExprSimplifier::new(context);

// And then call the simplify_expr function:
Expand All @@ -191,7 +195,8 @@ fn simplify_demo() -> Result<()> {

// here are some other examples of what DataFusion is capable of
let schema = Schema::new(vec![make_field("i", DataType::Int64)]).to_dfschema_ref()?;
let context = SimplifyContext::new(&props).with_schema(schema.clone());
let context =
SimplifyContext::new(&props, config_options).with_schema(schema.clone());
let simplifier = ExprSimplifier::new(context);

// basic arithmetic simplification
Expand Down Expand Up @@ -529,8 +534,8 @@ fn type_coercion_demo() -> Result<()> {

// Evaluation with an expression that has not been type coerced cannot succeed.
let props = ExecutionProps::default();
let physical_expr =
datafusion::physical_expr::create_physical_expr(&expr, &df_schema, &props)?;
let config_options = ConfigOptions::default_singleton_arc();
let physical_expr = create_physical_expr(&expr, &df_schema, &props, config_options)?;
let e = physical_expr.evaluate(&batch).unwrap_err();
assert!(e
.find_root()
Expand All @@ -543,26 +548,21 @@ fn type_coercion_demo() -> Result<()> {
assert!(physical_expr.evaluate(&batch).is_ok());

// 2. Type coercion with `ExprSimplifier::coerce`.
let context = SimplifyContext::new(&props).with_schema(Arc::new(df_schema.clone()));
let context = SimplifyContext::new(&props, config_options)
.with_schema(Arc::new(df_schema.clone()));
let simplifier = ExprSimplifier::new(context);
let coerced_expr = simplifier.coerce(expr.clone(), &df_schema)?;
let physical_expr = datafusion::physical_expr::create_physical_expr(
&coerced_expr,
&df_schema,
&props,
)?;
let physical_expr =
create_physical_expr(&coerced_expr, &df_schema, &props, config_options)?;
assert!(physical_expr.evaluate(&batch).is_ok());

// 3. Type coercion with `TypeCoercionRewriter`.
let coerced_expr = expr
.clone()
.rewrite(&mut TypeCoercionRewriter::new(&df_schema))?
.data;
let physical_expr = datafusion::physical_expr::create_physical_expr(
&coerced_expr,
&df_schema,
&props,
)?;
let physical_expr =
create_physical_expr(&coerced_expr, &df_schema, &props, config_options)?;
assert!(physical_expr.evaluate(&batch).is_ok());

// 4. Apply explicit type coercion by manually rewriting the expression
Expand All @@ -586,11 +586,8 @@ fn type_coercion_demo() -> Result<()> {
}
})?
.data;
let physical_expr = datafusion::physical_expr::create_physical_expr(
&coerced_expr,
&df_schema,
&props,
)?;
let physical_expr =
create_physical_expr(&coerced_expr, &df_schema, &props, config_options)?;
assert!(physical_expr.evaluate(&batch).is_ok());

Ok(())
Expand Down
18 changes: 10 additions & 8 deletions datafusion-examples/examples/planner_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.

use datafusion::error::Result;
use datafusion::execution::session_state::SessionStateOptimizerConfig;
use datafusion::logical_expr::LogicalPlan;
use datafusion::physical_plan::displayable;
use datafusion::physical_planner::DefaultPhysicalPlanner;
Expand Down Expand Up @@ -91,17 +92,19 @@ async fn to_physical_plan_step_by_step_demo(
ctx: &SessionContext,
) -> Result<()> {
// First analyze the logical plan
let analyzed_logical_plan = ctx.state().analyzer().execute_and_check(
let session_state = ctx.state();
let analyzed_logical_plan = session_state.analyzer().execute_and_check(
input,
ctx.state().config_options(),
session_state.config_options(),
|_, _| (),
)?;
println!("Analyzed logical plan:\n\n{:?}\n\n", analyzed_logical_plan);

// Optimize the analyzed logical plan
let optimized_logical_plan = ctx.state().optimizer().optimize(
let session_optimizer_config = SessionStateOptimizerConfig::new(&session_state);
let optimized_logical_plan = session_state.optimizer().optimize(
analyzed_logical_plan,
&ctx.state(),
&session_optimizer_config,
|_, _| (),
)?;
println!(
Expand All @@ -110,10 +113,9 @@ async fn to_physical_plan_step_by_step_demo(
);

// Create the physical plan
let physical_plan = ctx
.state()
let physical_plan = session_state
.query_planner()
.create_physical_plan(&optimized_logical_plan, &ctx.state())
.create_physical_plan(&optimized_logical_plan, &session_state)
.await?;
println!(
"Final physical plan:\n\n{}\n\n",
Expand All @@ -127,7 +129,7 @@ async fn to_physical_plan_step_by_step_demo(
// on DefaultPhysicalPlanner. Not all planners will provide this feature.
let planner = DefaultPhysicalPlanner::default();
let physical_plan =
planner.optimize_physical_plan(physical_plan, &ctx.state(), |_, _| {})?;
planner.optimize_physical_plan(physical_plan, &session_state, |_, _| {})?;
println!(
"Optimized physical plan:\n\n{}\n\n",
displayable(physical_plan.as_ref()).indent(false)
Expand Down
5 changes: 4 additions & 1 deletion datafusion-examples/examples/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use std::sync::Arc;

use arrow::array::{ArrayRef, BooleanArray, Int32Array};
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use datafusion::common::config::ConfigOptions;
use datafusion::common::{DFSchema, ScalarValue};
use datafusion::execution::context::ExecutionProps;
use datafusion::physical_expr::create_physical_expr;
Expand Down Expand Up @@ -188,7 +189,9 @@ impl PruningStatistics for MyCatalog {
fn create_pruning_predicate(expr: Expr, schema: &SchemaRef) -> PruningPredicate {
let df_schema = DFSchema::try_from(schema.as_ref().clone()).unwrap();
let props = ExecutionProps::new();
let physical_expr = create_physical_expr(&expr, &df_schema, &props).unwrap();
let config_options = ConfigOptions::default_singleton_arc();
let physical_expr =
create_physical_expr(&expr, &df_schema, &props, config_options).unwrap();
PruningPredicate::try_new(physical_expr, schema.clone()).unwrap()
}

Expand Down
4 changes: 3 additions & 1 deletion datafusion-examples/examples/simple_udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use datafusion::arrow::record_batch::RecordBatch;
use datafusion::catalog::Session;
use datafusion::catalog::TableFunctionImpl;
use datafusion::common::{plan_err, ScalarValue};
use datafusion::config::ConfigOptions;
use datafusion::datasource::memory::MemorySourceConfig;
use datafusion::datasource::TableProvider;
use datafusion::error::Result;
Expand Down Expand Up @@ -142,7 +143,8 @@ impl TableFunctionImpl for LocalCsvTableFunc {
.map(|expr| {
// try to simplify the expression, so 1+2 becomes 3, for example
let execution_props = ExecutionProps::new();
let info = SimplifyContext::new(&execution_props);
let config_options = ConfigOptions::default_singleton_arc();
let info = SimplifyContext::new(&execution_props, config_options);
let expr = ExprSimplifier::new(info).simplify(expr.clone())?;

if let Expr::Literal(ScalarValue::Int64(Some(limit))) = expr {
Expand Down
34 changes: 27 additions & 7 deletions datafusion/catalog-listing/src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@ use arrow::{
datatypes::{DataType, Field, Fields, Schema},
record_batch::RecordBatch,
};
use datafusion_expr::execution_props::ExecutionProps;

use futures::stream::FuturesUnordered;
use futures::{stream::BoxStream, StreamExt, TryStreamExt};
use log::{debug, trace};

use datafusion_common::config::ConfigOptions;
use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
use datafusion_common::{Column, DFSchema, DataFusionError};
use datafusion_expr::execution_props::ExecutionProps;
use datafusion_expr::{Expr, Volatility};
use datafusion_physical_expr::create_physical_expr;
use object_store::path::Path;
Expand Down Expand Up @@ -243,6 +245,7 @@ async fn prune_partitions(
partitions: Vec<Partition>,
filters: &[Expr],
partition_cols: &[(String, DataType)],
config_options: &Arc<ConfigOptions>,
) -> Result<Vec<Partition>> {
if filters.is_empty() {
return Ok(partitions);
Expand Down Expand Up @@ -294,7 +297,7 @@ async fn prune_partitions(

// Applies `filter` to `batch` returning `None` on error
let do_filter = |filter| -> Result<ArrayRef> {
let expr = create_physical_expr(filter, &df_schema, &props)?;
let expr = create_physical_expr(filter, &df_schema, &props, config_options)?;
expr.evaluate(&batch)?.into_array(partitions.len())
};

Expand Down Expand Up @@ -413,6 +416,7 @@ pub async fn pruned_partition_list<'a>(
filters: &'a [Expr],
file_extension: &'a str,
partition_cols: &'a [(String, DataType)],
config_options: &Arc<ConfigOptions>,
) -> Result<BoxStream<'a, Result<PartitionedFile>>> {
// if no partition col => simply list all the files
if partition_cols.is_empty() {
Expand All @@ -437,8 +441,14 @@ pub async fn pruned_partition_list<'a>(
.await?;
debug!("Listed {} partitions", partitions.len());

let pruned =
prune_partitions(table_path, partitions, filters, partition_cols).await?;
let pruned = prune_partitions(
table_path,
partitions,
filters,
partition_cols,
config_options,
)
.await?;

debug!("Pruning yielded {} partitions", pruned.len());

Expand Down Expand Up @@ -607,6 +617,7 @@ mod tests {
&[filter],
".parquet",
&[(String::from("mypartition"), DataType::Utf8)],
&Arc::clone(ConfigOptions::default_singleton_arc()),
)
.await
.expect("partition pruning failed")
Expand All @@ -632,6 +643,7 @@ mod tests {
&[filter],
".parquet",
&[(String::from("mypartition"), DataType::Utf8)],
&Arc::clone(ConfigOptions::default_singleton_arc()),
)
.await
.expect("partition pruning failed")
Expand Down Expand Up @@ -675,6 +687,7 @@ mod tests {
(String::from("part1"), DataType::Utf8),
(String::from("part2"), DataType::Utf8),
],
&Arc::clone(ConfigOptions::default_singleton_arc()),
)
.await
.expect("partition pruning failed")
Expand Down Expand Up @@ -1018,10 +1031,17 @@ mod tests {
.unwrap();
}

(Arc::new(memory), Arc::new(MockSession {}))
(
Arc::new(memory),
Arc::new(MockSession {
config: SessionConfig::new(),
}),
)
}

struct MockSession {}
struct MockSession {
config: SessionConfig,
}

#[async_trait]
impl Session for MockSession {
Expand All @@ -1030,7 +1050,7 @@ mod tests {
}

fn config(&self) -> &SessionConfig {
unimplemented!()
&self.config
}

async fn create_physical_plan(
Expand Down
2 changes: 2 additions & 0 deletions datafusion/catalog/src/memory/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ impl TableProvider for MemTable {
let sort_order = self.sort_order.lock();
if !sort_order.is_empty() {
let df_schema = DFSchema::try_from(self.schema.as_ref().clone())?;
let config_options = Arc::new(state.config_options().clone());

let file_sort_order = sort_order
.iter()
Expand All @@ -246,6 +247,7 @@ impl TableProvider for MemTable {
sort_exprs,
&df_schema,
state.execution_props(),
&config_options,
)
})
.collect::<Result<Vec<_>>>()?;
Expand Down
16 changes: 15 additions & 1 deletion datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use std::collections::{BTreeMap, HashMap};
use std::error::Error;
use std::fmt::{self, Display};
use std::str::FromStr;
use std::sync::{Arc, LazyLock};

use crate::error::_config_err;
use crate::parsers::CompressionTypeVariant;
Expand Down Expand Up @@ -724,7 +725,7 @@ config_namespace! {
}

/// A key value pair, with a corresponding description
#[derive(Debug)]
#[derive(Debug, Hash, PartialEq, Eq)]
pub struct ConfigEntry {
/// A unique string to identify this config value
pub key: String,
Expand Down Expand Up @@ -777,7 +778,20 @@ impl ConfigField for ConfigOptions {
}
}

static CONFIG_OPTIONS_SINGLETON: LazyLock<Arc<ConfigOptions>> =
LazyLock::new(|| Arc::new(ConfigOptions::default()));

impl ConfigOptions {
/// this is a static singleton to be used for testing only where the default values are sufficient
pub fn default_singleton() -> &'static ConfigOptions {
CONFIG_OPTIONS_SINGLETON.as_ref()
}

/// this is a static singleton to be used for testing only where the default values are sufficient
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💯

pub fn default_singleton_arc() -> &'static Arc<ConfigOptions> {
&CONFIG_OPTIONS_SINGLETON
}

/// Creates a new [`ConfigOptions`] with default values
pub fn new() -> Self {
Self::default()
Expand Down
Loading