@@ -1233,10 +1233,38 @@ impl PhysicalPlanner {
1233
1233
) -> Result < Arc < dyn AggregateExpr > , ExecutionError > {
1234
1234
match spark_expr. expr_struct . as_ref ( ) . unwrap ( ) {
1235
1235
AggExprStruct :: Count ( expr) => {
1236
- let if_expr = self . convert_count_to_if ( & expr. children , schema. clone ( ) ) ?;
1236
+ assert ! ( !expr. children. is_empty( ) ) ;
1237
+ // Using `count_udaf` from Comet is exceptionally slow for some reason, so
1238
+ // as a workaround we translate it to `SUM(IF(expr IS NOT NULL, 1, 0))`
1239
+ // https://github.com/apache/datafusion-comet/issues/744
1240
+
1241
+ let children = expr
1242
+ . children
1243
+ . iter ( )
1244
+ . map ( |child| self . create_expr ( child, schema. clone ( ) ) )
1245
+ . collect :: < Result < Vec < _ > , _ > > ( ) ?;
1246
+
1247
+ // create `IS NOT NULL expr` and join them with `AND` if there are multiple
1248
+ let not_null_expr: Arc < dyn PhysicalExpr > = children. iter ( ) . skip ( 1 ) . fold (
1249
+ Arc :: new ( IsNotNullExpr :: new ( children[ 0 ] . clone ( ) ) ) as Arc < dyn PhysicalExpr > ,
1250
+ |acc, child| {
1251
+ Arc :: new ( BinaryExpr :: new (
1252
+ acc,
1253
+ DataFusionOperator :: And ,
1254
+ Arc :: new ( IsNotNullExpr :: new ( child. clone ( ) ) ) ,
1255
+ ) )
1256
+ } ,
1257
+ ) ;
1258
+
1259
+ let child = Arc :: new ( IfExpr :: new (
1260
+ not_null_expr,
1261
+ Arc :: new ( Literal :: new ( ScalarValue :: Int64 ( Some ( 1 ) ) ) ) ,
1262
+ Arc :: new ( Literal :: new ( ScalarValue :: Int64 ( Some ( 0 ) ) ) ) ,
1263
+ ) ) ;
1264
+
1237
1265
create_aggregate_expr (
1238
1266
& sum_udaf ( ) ,
1239
- & [ if_expr ] ,
1267
+ & [ child ] ,
1240
1268
& [ ] ,
1241
1269
& [ ] ,
1242
1270
& [ ] ,
@@ -1491,40 +1519,6 @@ impl PhysicalPlanner {
1491
1519
}
1492
1520
}
1493
1521
1494
- fn convert_count_to_if (
1495
- & self ,
1496
- children : & [ Expr ] ,
1497
- schema : Arc < Schema > ,
1498
- ) -> Result < Arc < dyn PhysicalExpr > , ExecutionError > {
1499
- assert ! ( !children. is_empty( ) , "Children should not be empty" ) ;
1500
-
1501
- // Translate `COUNT` to `SUM(IF(expr IS NOT NULL, 1, 0))`
1502
- let children_exprs = children
1503
- . iter ( )
1504
- . map ( |child| self . create_expr ( child, schema. clone ( ) ) )
1505
- . collect :: < Result < Vec < _ > , _ > > ( ) ?;
1506
-
1507
- // Create `IS NOT NULL expr` and combine with `AND` for multiple children
1508
- let not_null_expr = children_exprs. iter ( ) . skip ( 1 ) . fold (
1509
- Arc :: new ( IsNotNullExpr :: new ( children_exprs[ 0 ] . clone ( ) ) ) as Arc < dyn PhysicalExpr > ,
1510
- |acc, child| {
1511
- Arc :: new ( BinaryExpr :: new (
1512
- acc,
1513
- DataFusionOperator :: And ,
1514
- Arc :: new ( IsNotNullExpr :: new ( child. clone ( ) ) ) ,
1515
- ) )
1516
- } ,
1517
- ) ;
1518
-
1519
- let if_expr = Arc :: new ( IfExpr :: new (
1520
- not_null_expr,
1521
- Arc :: new ( Literal :: new ( ScalarValue :: Int64 ( Some ( 1 ) ) ) ) ,
1522
- Arc :: new ( Literal :: new ( ScalarValue :: Int64 ( Some ( 0 ) ) ) ) ,
1523
- ) ) ;
1524
-
1525
- Ok ( if_expr)
1526
- }
1527
-
1528
1522
/// Create a DataFusion windows physical expression from Spark physical expression
1529
1523
fn create_window_expr < ' a > (
1530
1524
& ' a self ,
@@ -1646,8 +1640,12 @@ impl PhysicalPlanner {
1646
1640
) -> Result < ( String , Vec < Arc < dyn PhysicalExpr > > ) , ExecutionError > {
1647
1641
match & agg_func. expr_struct {
1648
1642
Some ( AggExprStruct :: Count ( expr) ) => {
1649
- let if_expr = self . convert_count_to_if ( & expr. children , schema. clone ( ) ) ?;
1650
- Ok ( ( "count" . to_string ( ) , vec ! [ if_expr] ) )
1643
+ let children = expr
1644
+ . children
1645
+ . iter ( )
1646
+ . map ( |child| self . create_expr ( child, schema. clone ( ) ) )
1647
+ . collect :: < Result < Vec < _ > , _ > > ( ) ?;
1648
+ Ok ( ( "count" . to_string ( ) , children) )
1651
1649
}
1652
1650
Some ( AggExprStruct :: Min ( expr) ) => {
1653
1651
let child = self . create_expr ( expr. child . as_ref ( ) . unwrap ( ) , schema. clone ( ) ) ?;
0 commit comments