From 0ae567fabe2ed7c432801370bb92ba9aadd9d4bb Mon Sep 17 00:00:00 2001 From: Jiashu-Hu Date: Fri, 21 Mar 2025 14:14:06 -0500 Subject: [PATCH 1/9] Added Custom Expression Planning Section --- docs/source/library-user-guide/adding-udfs.md | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index a365ef6696a3..bb45d0e25f73 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -1160,6 +1160,89 @@ async fn main() -> Result<()> { // +---+ ``` +## Custom Expression Planning + +DataFusion provides native support for a limited set of SQL operators by default. For operators not natively defined, developers can extend DataFusion's functionality by implementing custom expression planning. This extensibility is a core feature of DataFusion, allowing it to be customized for particular workloads and requirements. + +### Implementing Custom Expression Planning + +To extend DataFusion with support for custom operators not natively available, you need to: + +1. Implement the `ExprPlanner` trait: This allows you to define custom logic for planning expressions that DataFusion doesn't natively recognize. The trait provides the necessary interface to translate logical expressions into physical execution plans. + + For a detailed documentation please see: [Trait ExprPlanner](https://docs.rs/datafusion/latest/datafusion/logical_expr/planner/trait.ExprPlanner.html) + +2. Register your custom planner: Integrate your implementation with DataFusion's `SessionContext` to ensure your custom planning logic is invoked during the query optimization and execution planning phase. + + For a detailed documentation please see: [fn register_expr_planner](https://docs.rs/datafusion/latest/datafusion/execution/trait.FunctionRegistry.html#method.register_expr_planner) + +See example below: + +```rust +# use arrow::array::RecordBatch; +# use std::sync::Arc; + +# use datafusion::common::{assert_batches_eq, DFSchema}; +# use datafusion::error::Result; +# use datafusion::execution::FunctionRegistry; +# use datafusion::logical_expr::Operator; +# use datafusion::prelude::*; +# use datafusion::sql::sqlparser::ast::BinaryOperator; +# use datafusion_common::ScalarValue; +# use datafusion_expr::expr::Alias; +# use datafusion_expr::planner::{ExprPlanner, PlannerResult, RawBinaryExpr}; +# use datafusion_expr::BinaryExpr; + +# #[derive(Debug)] +# // Define the custom planner +# struct MyCustomPlanner; + +// Implement ExprPlanner for cutom operator logic +impl ExprPlanner for MyCustomPlanner { + fn plan_binary_op( + &self, + expr: RawBinaryExpr, + _schema: &DFSchema, + ) -> Result> { + match &expr.op { + // Map `->` to string concatenation + BinaryOperator::Arrow => { + // Rewrite `->` as a string concatenation operation + // - `left` and `right` are the operands (e.g., 'hello' and 'world') + // - `Operator::StringConcat` tells DataFusion to concatenate them + Ok(PlannerResult::Planned(Expr::BinaryExpr(BinaryExpr { + left: Box::new(expr.left.clone()), + right: Box::new(expr.right.clone()), + op: Operator::StringConcat, + }))) + } + _ => Ok(PlannerResult::Original(expr)), + } + } +} + +use datafusion::execution::context::SessionContext; +use datafusion::arrow::util::pretty; + +#[tokio::main] +async fn main() -> Result<()> { + let config = SessionContext::new().set_str("datafusion.sql_parser.dialect", "postgres"); + let mut ctx = SessionContext::new_with_config(config); + ctx.register_expr_planner(Arc::new(MyCustomPlanner))?; + let results = ctx.sql("select 'foo'->'bar';").await?.collect().await?; + + pretty::print_batches(&results)?; + Ok(()) +} + +// "+----------------------------+", +// "| Utf8(\"foo\") || Utf8(\"bar\") |", +// "+----------------------------+", +// "| foobar |", +// "+----------------------------+", + +``` + [1]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udf.rs [2]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udwf.rs [3]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udaf.rs From 3ec5cb938d10765b96619fb5c6cdb1e3d123bfe4 Mon Sep 17 00:00:00 2001 From: Jiashu-Hu Date: Fri, 21 Mar 2025 15:29:04 -0500 Subject: [PATCH 2/9] improve syntax --- docs/source/library-user-guide/adding-udfs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index bb45d0e25f73..fbc960ee45e2 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -1226,7 +1226,7 @@ use datafusion::arrow::util::pretty; #[tokio::main] async fn main() -> Result<()> { - let config = SessionContext::new().set_str("datafusion.sql_parser.dialect", "postgres"); + let config = SessionConfig::new().set_str("datafusion.sql_parser.dialect", "postgres"); let mut ctx = SessionContext::new_with_config(config); ctx.register_expr_planner(Arc::new(MyCustomPlanner))?; let results = ctx.sql("select 'foo'->'bar';").await?.collect().await?; From b70cbc868bdc5473960bef9c80dbbe71f49b3c0e Mon Sep 17 00:00:00 2001 From: Jiashu Hu <61627210+Jiashu-Hu@users.noreply.github.com> Date: Mon, 24 Mar 2025 12:57:58 -0500 Subject: [PATCH 3/9] Update docs/source/library-user-guide/adding-udfs.md Co-authored-by: Andrew Lamb --- docs/source/library-user-guide/adding-udfs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index fbc960ee45e2..4afd743b88a6 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -1162,7 +1162,7 @@ async fn main() -> Result<()> { ## Custom Expression Planning -DataFusion provides native support for a limited set of SQL operators by default. For operators not natively defined, developers can extend DataFusion's functionality by implementing custom expression planning. This extensibility is a core feature of DataFusion, allowing it to be customized for particular workloads and requirements. +DataFusion provides native support for common SQL operators by default such as `+`, `-`, `||`. However it does not provide support for other operators such as `@>`. To override DataFusion's default handling or support unsupported operators, developers can extend DataFusion by implementing custom expression planning, a core feature of DataFusion ### Implementing Custom Expression Planning From 6f6716a54f254612dbd2e2dcacb18fb3654d3874 Mon Sep 17 00:00:00 2001 From: Jiashu Hu <61627210+Jiashu-Hu@users.noreply.github.com> Date: Mon, 24 Mar 2025 12:58:06 -0500 Subject: [PATCH 4/9] Update docs/source/library-user-guide/adding-udfs.md Co-authored-by: Andrew Lamb --- docs/source/library-user-guide/adding-udfs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index 4afd743b88a6..26f49d7185d9 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -1170,7 +1170,7 @@ To extend DataFusion with support for custom operators not natively available, y 1. Implement the `ExprPlanner` trait: This allows you to define custom logic for planning expressions that DataFusion doesn't natively recognize. The trait provides the necessary interface to translate logical expressions into physical execution plans. - For a detailed documentation please see: [Trait ExprPlanner](https://docs.rs/datafusion/latest/datafusion/logical_expr/planner/trait.ExprPlanner.html) + For detailed documentation please see: [Trait ExprPlanner](https://docs.rs/datafusion/latest/datafusion/logical_expr/planner/trait.ExprPlanner.html) 2. Register your custom planner: Integrate your implementation with DataFusion's `SessionContext` to ensure your custom planning logic is invoked during the query optimization and execution planning phase. From fafd7414042d3965ca5ed9ffc5bf81c7494fa6c7 Mon Sep 17 00:00:00 2001 From: Jiashu Hu <61627210+Jiashu-Hu@users.noreply.github.com> Date: Mon, 24 Mar 2025 12:58:11 -0500 Subject: [PATCH 5/9] Update docs/source/library-user-guide/adding-udfs.md Co-authored-by: Andrew Lamb --- docs/source/library-user-guide/adding-udfs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index 26f49d7185d9..a9a8f1b66ba1 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -1168,7 +1168,7 @@ DataFusion provides native support for common SQL operators by default such as ` To extend DataFusion with support for custom operators not natively available, you need to: -1. Implement the `ExprPlanner` trait: This allows you to define custom logic for planning expressions that DataFusion doesn't natively recognize. The trait provides the necessary interface to translate logical expressions into physical execution plans. +1. Implement the `ExprPlanner` trait: This allows you to define custom logic for planning expressions that DataFusion doesn't natively recognize. The trait provides the necessary interface to translate SQL AST nodes into logical `Expr`. For detailed documentation please see: [Trait ExprPlanner](https://docs.rs/datafusion/latest/datafusion/logical_expr/planner/trait.ExprPlanner.html) From c90ad1952a6bb3d38e6e31dd60f99fc248cbab10 Mon Sep 17 00:00:00 2001 From: Jiashu Hu <61627210+Jiashu-Hu@users.noreply.github.com> Date: Mon, 24 Mar 2025 12:58:21 -0500 Subject: [PATCH 6/9] Update docs/source/library-user-guide/adding-udfs.md Co-authored-by: Andrew Lamb --- docs/source/library-user-guide/adding-udfs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index a9a8f1b66ba1..7f2f64254d1b 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -1174,7 +1174,7 @@ To extend DataFusion with support for custom operators not natively available, y 2. Register your custom planner: Integrate your implementation with DataFusion's `SessionContext` to ensure your custom planning logic is invoked during the query optimization and execution planning phase. - For a detailed documentation please see: [fn register_expr_planner](https://docs.rs/datafusion/latest/datafusion/execution/trait.FunctionRegistry.html#method.register_expr_planner) + For a detailed documentation see: [fn register_expr_planner](https://docs.rs/datafusion/latest/datafusion/execution/trait.FunctionRegistry.html#method.register_expr_planner) See example below: From 76e8dd09c0e2d9d5e6cb14edcac553f6d18de5bb Mon Sep 17 00:00:00 2001 From: Jiashu Hu <61627210+Jiashu-Hu@users.noreply.github.com> Date: Mon, 24 Mar 2025 12:58:27 -0500 Subject: [PATCH 7/9] Update docs/source/library-user-guide/adding-udfs.md Co-authored-by: Andrew Lamb --- docs/source/library-user-guide/adding-udfs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index 7f2f64254d1b..7c1d4a3df4ec 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -1197,7 +1197,7 @@ See example below: # // Define the custom planner # struct MyCustomPlanner; -// Implement ExprPlanner for cutom operator logic +// Implement ExprPlanner to add support for the `->` custom operator impl ExprPlanner for MyCustomPlanner { fn plan_binary_op( &self, From a7f5000d7abe24fc2192036f93c69d75e353ef9d Mon Sep 17 00:00:00 2001 From: Jiashu Hu <61627210+Jiashu-Hu@users.noreply.github.com> Date: Mon, 24 Mar 2025 13:09:30 -0500 Subject: [PATCH 8/9] Update adding-udfs.md added assert for CI process --- docs/source/library-user-guide/adding-udfs.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index 7c1d4a3df4ec..87019eca7982 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -1231,16 +1231,18 @@ async fn main() -> Result<()> { ctx.register_expr_planner(Arc::new(MyCustomPlanner))?; let results = ctx.sql("select 'foo'->'bar';").await?.collect().await?; + let expected = [ + "+----------------------------+", + "| Utf8(\"foo\") || Utf8(\"bar\") |", + "+----------------------------+", + "| foobar |", + "+----------------------------+", + ]; + assert_batches_eq!(&expected, &results); + pretty::print_batches(&results)?; Ok(()) } - -// "+----------------------------+", -// "| Utf8(\"foo\") || Utf8(\"bar\") |", -// "+----------------------------+", -// "| foobar |", -// "+----------------------------+", - ``` [1]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udf.rs From fa6d331d6cbda3849e70bc7c69d1a56ed5dfd9fe Mon Sep 17 00:00:00 2001 From: Jiashu-Hu Date: Mon, 24 Mar 2025 13:27:17 -0500 Subject: [PATCH 9/9] formatted language request by prettier --- docs/source/library-user-guide/adding-udfs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/adding-udfs.md b/docs/source/library-user-guide/adding-udfs.md index 87019eca7982..8fb8a59fb860 100644 --- a/docs/source/library-user-guide/adding-udfs.md +++ b/docs/source/library-user-guide/adding-udfs.md @@ -1162,7 +1162,7 @@ async fn main() -> Result<()> { ## Custom Expression Planning -DataFusion provides native support for common SQL operators by default such as `+`, `-`, `||`. However it does not provide support for other operators such as `@>`. To override DataFusion's default handling or support unsupported operators, developers can extend DataFusion by implementing custom expression planning, a core feature of DataFusion +DataFusion provides native support for common SQL operators by default such as `+`, `-`, `||`. However it does not provide support for other operators such as `@>`. To override DataFusion's default handling or support unsupported operators, developers can extend DataFusion by implementing custom expression planning, a core feature of DataFusion ### Implementing Custom Expression Planning