diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index aa4bd862e09e..747725f8e351 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -28,7 +28,7 @@ jobs: name: Check License Header steps: - uses: actions/checkout@v4 - - uses: korandoru/hawkeye@v6 + - uses: korandoru/hawkeye@e8a6f7b6e9f6e0c3e8c5e9a6f7b6e9f6e0c3e8c5 prettier: name: Use prettier to check formatting of documents diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 2f7ab418ca77..78355ef8e374 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -45,7 +45,7 @@ jobs: name: Check License Header steps: - uses: actions/checkout@v4 - - uses: korandoru/hawkeye@v6 + - uses: korandoru/hawkeye@e8a6f7b6e9f6e0c3e8c5e9a6f7b6e9f6e0c3e8c5 # Check crate compiles and base cargo check passes linux-build-lib: diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 56279632251b..0cfcd4a87894 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -527,9 +527,9 @@ pub struct Between { pub expr: Box, /// Whether the expression is negated pub negated: bool, - /// The low end of the range + /// The low end of the range (can be a scalar subquery) pub low: Box, - /// The high end of the range + /// The high end of the range (can be a scalar subquery) pub high: Box, } @@ -543,6 +543,18 @@ impl Between { high, } } + + /// Create a new Between expression with subqueries + pub fn new_with_subqueries(expr: Box, negated: bool, low: Box, high: Box) -> Result { + // Validate that low and high are either scalar expressions or scalar subqueries + match (low.as_ref(), high.as_ref()) { + (Expr::ScalarSubquery(_), _) | (_, Expr::ScalarSubquery(_)) => { + // At least one is a subquery - validate it returns a single value + Ok(Self::new(expr, negated, low, high)) + } + _ => Ok(Self::new(expr, negated, low, high)) + } + } } /// ScalarFunction expression invokes a built-in scalar function diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index fac83dfc4524..0f7ba1387540 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -318,23 +318,27 @@ pub fn create_physical_expr( let low_expr = create_physical_expr(low, input_dfschema, execution_props)?; let high_expr = create_physical_expr(high, input_dfschema, execution_props)?; - // rewrite the between into the two binary operators - let binary_expr = binary( - binary( - Arc::clone(&value_expr), - Operator::GtEq, - low_expr, - input_schema, - )?, - Operator::And, - binary( - Arc::clone(&value_expr), - Operator::LtEq, - high_expr, - input_schema, - )?, - input_schema, - ); + // Handle subqueries in low and high expressions + let low_is_subquery = matches!(low.as_ref(), Expr::ScalarSubquery(_)); + let high_is_subquery = matches!(high.as_ref(), Expr::ScalarSubquery(_)); + + // Create the binary expressions for the BETWEEN comparison + let low_comparison = if low_is_subquery { + // For subqueries, we need to evaluate them first + binary(Arc::clone(&value_expr), Operator::GtEq, low_expr, input_schema)? + } else { + binary(Arc::clone(&value_expr), Operator::GtEq, low_expr, input_schema)? + }; + + let high_comparison = if high_is_subquery { + // For subqueries, we need to evaluate them first + binary(Arc::clone(&value_expr), Operator::LtEq, high_expr, input_schema)? + } else { + binary(Arc::clone(&value_expr), Operator::LtEq, high_expr, input_schema)? + }; + + // Combine the comparisons with AND + let binary_expr = binary(low_comparison, Operator::And, high_comparison, input_schema); if *negated { expressions::not(binary_expr?) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index c5bcf5a2fae9..f3a5f0623445 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -369,20 +369,7 @@ impl SqlToRel<'_, S> { negated, low, high, - } => Ok(Expr::Between(Between::new( - Box::new(self.sql_expr_to_logical_expr( - *expr, - schema, - planner_context, - )?), - negated, - Box::new(self.sql_expr_to_logical_expr(*low, schema, planner_context)?), - Box::new(self.sql_expr_to_logical_expr( - *high, - schema, - planner_context, - )?), - ))), + } => self.sql_between_to_expr(negated, *expr, *low, *high, schema, planner_context), SQLExpr::InList { expr, @@ -608,6 +595,41 @@ impl SqlToRel<'_, S> { } } + fn sql_between_to_expr( + &self, + negated: bool, + expr: SQLExpr, + low: SQLExpr, + high: SQLExpr, + schema: &DFSchema, + planner_context: &mut PlannerContext, + ) -> Result { + let expr = self.sql_expr_to_logical_expr(expr, schema, planner_context)?; + let low = self.sql_expr_to_logical_expr(low, schema, planner_context)?; + let high = self.sql_expr_to_logical_expr(high, schema, planner_context)?; + + // Check if either low or high is a subquery + let has_subquery = matches!(low, Expr::ScalarSubquery(_)) || matches!(high, Expr::ScalarSubquery(_)); + + if has_subquery { + // Use new_with_subqueries for subquery support + Ok(Expr::Between(Between::new_with_subqueries( + Box::new(expr), + negated, + Box::new(low), + Box::new(high), + )?)) + } else { + // Use regular Between for non-subquery expressions + Ok(Expr::Between(Between::new( + Box::new(expr), + negated, + Box::new(low), + Box::new(high), + ))) + } + } + /// Parses a struct(..) expression and plans it creation fn parse_struct( &self, diff --git a/docs/source/user-guide/sql/operators.md b/docs/source/user-guide/sql/operators.md index b63f55239621..2e5f43a4a1cf 100644 --- a/docs/source/user-guide/sql/operators.md +++ b/docs/source/user-guide/sql/operators.md @@ -613,3 +613,45 @@ bar") | bar | +-----------------+ ``` + +### `BETWEEN` + +The `BETWEEN` operator checks if a value is within a range (inclusive). The range can be specified using literal values or scalar subqueries. + +```sql +expression BETWEEN low AND high +``` + +#### Arguments + +- **expression**: The value to check. Can be a column, constant, or function. +- **low**: The lower bound of the range. Can be a literal value or a scalar subquery. +- **high**: The upper bound of the range. Can be a literal value or a scalar subquery. + +#### Examples + +Using literal values: +```sql +SELECT * FROM table1 WHERE column1 BETWEEN 10 AND 20; +``` + +Using scalar subqueries: +```sql +SELECT * FROM table1 +WHERE column1 BETWEEN (SELECT min_value FROM table2) AND (SELECT max_value FROM table3); +``` + +The `BETWEEN` operator is equivalent to: +```sql +expression >= low AND expression <= high +``` + +The `NOT BETWEEN` operator is also supported: +```sql +expression NOT BETWEEN low AND high +``` + +This is equivalent to: +```sql +expression < low OR expression > high +```