Skip to content

chore: remove deprecated variants of UDF's invoke (invoke, invoke_no_args, invoke_batch) #15123

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 17, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -29,7 +29,9 @@ use datafusion_common::Result;
use datafusion_common::{JoinSide, JoinType, ScalarValue};
use datafusion_execution::object_store::ObjectStoreUrl;
use datafusion_execution::{SendableRecordBatchStream, TaskContext};
use datafusion_expr::{Operator, ScalarUDF, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr::{
Operator, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_physical_expr::expressions::{
binary, cast, col, BinaryExpr, CaseExpr, CastExpr, Column, Literal, NegativeExpr,
};
@@ -57,6 +59,7 @@ use datafusion_physical_plan::streaming::StreamingTableExec;
use datafusion_physical_plan::union::UnionExec;
use datafusion_physical_plan::{get_plan_string, ExecutionPlan};

use datafusion_expr_common::columnar_value::ColumnarValue;
use itertools::Itertools;

/// Mocked UDF
@@ -89,6 +92,10 @@ impl ScalarUDFImpl for DummyUDF {
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(DataType::Int32)
}

fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("dummy - not implemented")
Copy link
Member

@Weijun-H Weijun-H Mar 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what about using not_impl_err instead of painc?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked other traits, it would prefer unimplemented!().

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW this is a test

}
}

#[test]
Original file line number Diff line number Diff line change
@@ -679,6 +679,10 @@ impl ScalarUDFImpl for CastToI64UDF {
Ok(DataType::Int64)
}

fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("dummy - not implemented")
}

// Demonstrate simplifying a UDF
fn simplify(
&self,
@@ -946,6 +950,10 @@ impl ScalarUDFImpl for ScalarFunctionWrapper {
Ok(self.return_type.clone())
}

fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("dummy - not implemented")
}

fn simplify(
&self,
args: Vec<Expr>,
98 changes: 2 additions & 96 deletions datafusion/expr/src/udf.rs
Original file line number Diff line number Diff line change
@@ -20,9 +20,7 @@
use crate::expr::schema_name_from_exprs_comma_separated_without_space;
use crate::simplify::{ExprSimplifyResult, SimplifyInfo};
use crate::sort_properties::{ExprProperties, SortProperties};
use crate::{
ColumnarValue, Documentation, Expr, ScalarFunctionImplementation, Signature,
};
use crate::{ColumnarValue, Documentation, Expr, Signature};
use arrow::datatypes::DataType;
use datafusion_common::{not_impl_err, ExprSchema, Result, ScalarValue};
use datafusion_expr_common::interval_arithmetic::Interval;
@@ -198,53 +196,18 @@ impl ScalarUDF {
self.inner.simplify(args, info)
}

#[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")]
pub fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
#[allow(deprecated)]
self.inner.invoke(args)
}

Comment on lines -201 to -206
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

#[allow(deprecated)]
pub fn is_nullable(&self, args: &[Expr], schema: &dyn ExprSchema) -> bool {
self.inner.is_nullable(args, schema)
}

#[deprecated(since = "46.0.0", note = "Use `invoke_with_args` instead")]
pub fn invoke_batch(
&self,
args: &[ColumnarValue],
number_rows: usize,
) -> Result<ColumnarValue> {
#[allow(deprecated)]
self.inner.invoke_batch(args, number_rows)
}

/// Invoke the function on `args`, returning the appropriate result.
///
/// See [`ScalarUDFImpl::invoke_with_args`] for details.
pub fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
self.inner.invoke_with_args(args)
}

/// Invoke the function without `args` but number of rows, returning the appropriate result.
///
/// Note: This method is deprecated and will be removed in future releases.
/// User defined functions should implement [`Self::invoke_with_args`] instead.
#[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")]
pub fn invoke_no_args(&self, number_rows: usize) -> Result<ColumnarValue> {
#[allow(deprecated)]
self.inner.invoke_no_args(number_rows)
}

/// Returns a `ScalarFunctionImplementation` that can invoke the function
/// during execution
#[deprecated(since = "42.0.0", note = "Use `invoke_with_args` instead")]
pub fn fun(&self) -> ScalarFunctionImplementation {
let captured = Arc::clone(&self.inner);
#[allow(deprecated)]
Arc::new(move |args| captured.invoke(args))
}

/// Get the circuits of inner implementation
pub fn short_circuits(&self) -> bool {
self.inner.short_circuits()
@@ -568,47 +531,6 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
true
}

/// Invoke the function on `args`, returning the appropriate result
///
/// Note: This method is deprecated and will be removed in future releases.
/// User defined functions should implement [`Self::invoke_with_args`] instead.
#[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")]
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
not_impl_err!(
"Function {} does not implement invoke but called",
self.name()
)
}

/// Invoke the function with `args` and the number of rows,
/// returning the appropriate result.
///
/// Note: See notes on [`Self::invoke_with_args`]
///
/// Note: This method is deprecated and will be removed in future releases.
/// User defined functions should implement [`Self::invoke_with_args`] instead.
///
/// See <https://github.com/apache/datafusion/issues/13515> for more details.
#[deprecated(since = "46.0.0", note = "Use `invoke_with_args` instead")]
fn invoke_batch(
&self,
args: &[ColumnarValue],
number_rows: usize,
) -> Result<ColumnarValue> {
match args.is_empty() {
true =>
{
#[allow(deprecated)]
self.invoke_no_args(number_rows)
}
false =>
{
#[allow(deprecated)]
self.invoke(args)
}
}
}

/// Invoke the function returning the appropriate result.
///
/// # Performance
@@ -619,23 +541,7 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
///
/// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments
/// to arrays, which will likely be simpler code, but be slower.
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
#[allow(deprecated)]
self.invoke_batch(&args.args, args.number_rows)
}

/// Invoke the function without `args`, instead the number of rows are provided,
/// returning the appropriate result.
///
/// Note: This method is deprecated and will be removed in future releases.
/// User defined functions should implement [`Self::invoke_with_args`] instead.
#[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")]
fn invoke_no_args(&self, _number_rows: usize) -> Result<ColumnarValue> {
not_impl_err!(
"Function {} does not implement invoke_no_args but called",
self.name()
)
}
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue>;

/// Returns any aliases (alternate names) for this function.
///
11 changes: 6 additions & 5 deletions datafusion/functions-nested/src/max.rs
Original file line number Diff line number Diff line change
@@ -24,7 +24,9 @@ use datafusion_common::cast::as_list_array;
use datafusion_common::utils::take_function_args;
use datafusion_common::{exec_err, ScalarValue};
use datafusion_doc::Documentation;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion_expr::{
ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_functions_aggregate::min_max;
use datafusion_macros::user_doc;
use itertools::Itertools;
@@ -96,12 +98,11 @@ impl ScalarUDFImpl for ArrayMax {
}
}

fn invoke_batch(
fn invoke_with_args(
&self,
args: &[ColumnarValue],
_number_rows: usize,
args: ScalarFunctionArgs,
) -> datafusion_common::Result<ColumnarValue> {
make_scalar_function(array_max_inner)(args)
make_scalar_function(array_max_inner)(&args.args)
}

fn aliases(&self) -> &[String] {
10 changes: 7 additions & 3 deletions datafusion/optimizer/src/common_subexpr_eliminate.rs
Original file line number Diff line number Diff line change
@@ -797,8 +797,8 @@ mod test {
use datafusion_expr::logical_plan::{table_scan, JoinType};
use datafusion_expr::{
grouping_set, is_null, not, AccumulatorFactoryFunction, AggregateUDF,
ColumnarValue, ScalarUDF, ScalarUDFImpl, Signature, SimpleAggregateUDF,
Volatility,
ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
SimpleAggregateUDF, Volatility,
};
use datafusion_expr::{lit, logical_plan::builder::LogicalPlanBuilder};

@@ -1598,7 +1598,7 @@ mod test {
Ok(DataType::Int32)
}

fn invoke(&self, _: &[ColumnarValue]) -> Result<ColumnarValue> {
fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("not implemented")
}
}
@@ -1705,5 +1705,9 @@ mod test {
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(DataType::Float64)
}

fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("dummy - not implemented")
}
}
}
Original file line number Diff line number Diff line change
@@ -4307,6 +4307,10 @@ mod tests {
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(DataType::Int16)
}

fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("dummy - not implemented")
}
}

#[test]
18 changes: 13 additions & 5 deletions datafusion/proto/tests/cases/mod.rs
Original file line number Diff line number Diff line change
@@ -16,17 +16,17 @@
// under the License.

use arrow::datatypes::{DataType, Field};
use std::any::Any;
use std::fmt::Debug;

use datafusion::logical_expr::ColumnarValue;
use datafusion_common::plan_err;
use datafusion_expr::function::AccumulatorArgs;
use datafusion_expr::{
Accumulator, AggregateUDFImpl, PartitionEvaluator, ScalarUDFImpl, Signature,
Volatility, WindowUDFImpl,
Accumulator, AggregateUDFImpl, PartitionEvaluator, ScalarFunctionArgs, ScalarUDFImpl,
Signature, Volatility, WindowUDFImpl,
};
use datafusion_functions_window_common::field::WindowUDFFieldArgs;
use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
use std::any::Any;
use std::fmt::Debug;

mod roundtrip_logical_plan;
mod roundtrip_physical_plan;
@@ -69,6 +69,14 @@ impl ScalarUDFImpl for MyRegexUdf {
plan_err!("regex_udf only accepts Utf8 arguments")
}
}

fn invoke_with_args(
&self,
_args: ScalarFunctionArgs,
) -> datafusion_common::Result<ColumnarValue> {
panic!("dummy - not implemented")
}

fn aliases(&self) -> &[String] {
&self.aliases
}
8 changes: 6 additions & 2 deletions datafusion/sql/src/unparser/expr.rs
Original file line number Diff line number Diff line change
@@ -1661,8 +1661,8 @@ mod tests {
use datafusion_expr::{
case, cast, col, cube, exists, grouping_set, interval_datetime_lit,
interval_year_month_lit, lit, not, not_exists, out_ref_col, placeholder, rollup,
table_scan, try_cast, when, ScalarUDF, ScalarUDFImpl, Signature, Volatility,
WindowFrame, WindowFunctionDefinition,
table_scan, try_cast, when, ColumnarValue, ScalarFunctionArgs, ScalarUDF,
ScalarUDFImpl, Signature, Volatility, WindowFrame, WindowFunctionDefinition,
};
use datafusion_expr::{interval_month_day_nano_lit, ExprFunctionExt};
use datafusion_functions::expr_fn::{get_field, named_struct};
@@ -1711,6 +1711,10 @@ mod tests {
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(DataType::Int32)
}

fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("dummy - not implemented")
}
}
// See sql::tests for E2E tests.

8 changes: 6 additions & 2 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
@@ -30,8 +30,8 @@ use datafusion_expr::{
col,
logical_plan::{LogicalPlan, Prepare},
test::function_stub::sum_udaf,
CreateIndex, DdlStatement, ScalarUDF, ScalarUDFImpl, Signature, Statement,
Volatility,
ColumnarValue, CreateIndex, DdlStatement, ScalarFunctionArgs, ScalarUDF,
ScalarUDFImpl, Signature, Statement, Volatility,
};
use datafusion_functions::{string, unicode};
use datafusion_sql::{
@@ -2477,6 +2477,10 @@ impl ScalarUDFImpl for DummyUDF {
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
Ok(self.return_type.clone())
}

fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
panic!("dummy - not implemented")
}
}

/// Create logical plan, write with formatter, compare to expected output