From e78d2f63c83058003cbf18cafea1f5bc0c8e09aa Mon Sep 17 00:00:00 2001 From: stephen <91597003+stephen-shelby@users.noreply.github.com> Date: Tue, 17 Dec 2024 19:26:34 +0800 Subject: [PATCH] [Enhancement] Disable extract agg columns on complex and json type columns for pruning columns (#53991) Signed-off-by: stephen --- .../rule/tree/ExtractAggregateColumn.java | 15 +++++++++++++-- .../connector/parser/trino/TrinoQueryTest.java | 2 +- .../com/starrocks/sql/plan/AggregateTest.java | 4 ++-- .../com/starrocks/sql/plan/ArrayTypeTest.java | 9 +++++++-- .../com/starrocks/sql/plan/ExpressionTest.java | 8 +++++--- 5 files changed, 28 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/tree/ExtractAggregateColumn.java b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/tree/ExtractAggregateColumn.java index 6302dcb56ddb4..f618639a61ea5 100644 --- a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/tree/ExtractAggregateColumn.java +++ b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/tree/ExtractAggregateColumn.java @@ -20,7 +20,6 @@ import com.starrocks.sql.optimizer.OptExpressionVisitor; import com.starrocks.sql.optimizer.base.ColumnRefFactory; import com.starrocks.sql.optimizer.base.ColumnRefSet; -import com.starrocks.sql.optimizer.operator.OperatorType; import com.starrocks.sql.optimizer.operator.Projection; import com.starrocks.sql.optimizer.operator.physical.PhysicalHashAggregateOperator; import com.starrocks.sql.optimizer.operator.scalar.CallOperator; @@ -69,6 +68,18 @@ private boolean hasNonColumnRefParameter(PhysicalHashAggregateOperator aggregate return false; } + private boolean hasComplexOrJsonTypeColumn(ScalarOperator operator) { + if (operator.isColumnRef() && (operator.getType().isComplexType() || operator.getType().isJsonType())) { + return true; + } + for (ScalarOperator child : operator.getChildren()) { + if (hasComplexOrJsonTypeColumn(child)) { + return true; + } + } + return false; + } + private void rewriteAggregateOperator(PhysicalHashAggregateOperator aggregateOperator, Projection projection) { Map columnRefMap = projection.getColumnRefMap(); Map rewriteMap = Maps.newHashMap(); @@ -92,7 +103,7 @@ private void rewriteAggregateOperator(PhysicalHashAggregateOperator aggregateOpe return; } if (!scalarOperator.isColumnRef() && !hasDictMappingOperator(scalarOperator) && - !(scalarOperator.getOpType() == OperatorType.SUBFIELD)) { + !hasComplexOrJsonTypeColumn(scalarOperator)) { rewriteMap.put(childRef, scalarOperator); extractedColumns.add(childRef); } diff --git a/fe/fe-core/src/test/java/com/starrocks/connector/parser/trino/TrinoQueryTest.java b/fe/fe-core/src/test/java/com/starrocks/connector/parser/trino/TrinoQueryTest.java index c315351fada9e..5ec96e6cd1440 100644 --- a/fe/fe-core/src/test/java/com/starrocks/connector/parser/trino/TrinoQueryTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/connector/parser/trino/TrinoQueryTest.java @@ -447,7 +447,7 @@ public void testSelectMap() throws Exception { sql = "select avg(c1[1]) from test_map where c1[1] is not null"; assertPlanContains(sql, "2:AGGREGATE (update finalize)\n" + - " | output: avg(2: c1[1])"); + " | output: avg(5: expr)"); sql = "select c2[2][1] from test_map"; assertPlanContains(sql, " : 3: c2[2][1]"); diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/plan/AggregateTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/plan/AggregateTest.java index b331e08dcaa0b..f6d92e1a5e985 100644 --- a/fe/fe-core/src/test/java/com/starrocks/sql/plan/AggregateTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/sql/plan/AggregateTest.java @@ -1658,11 +1658,11 @@ public void testAggregateDuplicatedExprs() throws Exception { "sum(arrays_overlap(v3, [1])) as q2, " + "sum(arrays_overlap(v3, [1])) as q3 FROM tarray;"); assertContains(plan, " 2:AGGREGATE (update finalize)\n" + - " | output: sum(arrays_overlap(3: v3, CAST([1] AS ARRAY)))\n" + + " | output: sum(4: arrays_overlap)\n" + " | group by: \n" + " | \n" + " 1:Project\n" + - " | : 3: v3\n" + + " | : arrays_overlap(3: v3, CAST([1] AS ARRAY))\n" + " | \n" + " 0:OlapScanNode"); } diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/plan/ArrayTypeTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/plan/ArrayTypeTest.java index 016226fd09485..f1924df078f18 100644 --- a/fe/fe-core/src/test/java/com/starrocks/sql/plan/ArrayTypeTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/sql/plan/ArrayTypeTest.java @@ -738,7 +738,12 @@ public void testCountDistinctLambdaGlobalAgg() throws Exception { "count(distinct array_length(array_map(x -> x + 1, d_2))) from adec"; String plan = getFragmentPlan(sql); assertCContains(plan, " 2:AGGREGATE (update finalize)\n" + - " | output: multi_distinct_count(array_length(array_map" + - "( -> CAST( AS DECIMAL64(13,3)) + 1, 5: d_2)))"); + " | output: multi_distinct_count(11: array_length)\n" + + " | group by: \n" + + " | \n" + + " 1:Project\n" + + " | : array_length(array_map( -> CAST( AS DECIMAL64(13,3)) + 1, 5: d_2))\n" + + " | \n" + + " 0:OlapScanNode"); } } diff --git a/fe/fe-core/src/test/java/com/starrocks/sql/plan/ExpressionTest.java b/fe/fe-core/src/test/java/com/starrocks/sql/plan/ExpressionTest.java index e18e672ec4349..ca94f374b6b89 100644 --- a/fe/fe-core/src/test/java/com/starrocks/sql/plan/ExpressionTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/sql/plan/ExpressionTest.java @@ -728,9 +728,11 @@ public void testLambdaWithAggAndWindowFunctions() throws Exception { String sql = "select array_agg(array_length(array_map(x->x*2, c2))) from test_array12"; String plan = getFragmentPlan(sql); Assert.assertTrue(plan.contains(" 2:AGGREGATE (update finalize)\n" + - " | output: array_agg(array_length(array_map( -> CAST( AS BIGINT) * 2, 3: c2)))")); - Assert.assertTrue(plan.contains(" 1:Project\n" + - " | : 3: c2")); + " | output: array_agg(5: array_length)\n" + + " | group by: \n" + + " | \n" + + " 1:Project\n" + + " | : array_length(array_map( -> CAST( AS BIGINT) * 2, 3: c2))")); sql = "select array_map(x->x > count(c1), c2) from test_array12 group by c2"; plan = getFragmentPlan(sql);