Skip to content

[enhance](nereids) add eliminate order by key by data trait #46225

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
b22a436
add eliminate order by key by uniform
feiniaofeiafei Dec 31, 2024
2044dac
fix regression tpcds shape
feiniaofeiafei Jan 16, 2025
8bf251e
fix regression
feiniaofeiafei Jan 16, 2025
58af56f
add EliminateOrderByKey first commit
feiniaofeiafei Jan 21, 2025
f48fe7d
add uniform process for topn and sort, and eliminate order by a,a
feiniaofeiafei Jan 21, 2025
1bada3e
move eliminate sort key by uniform in rule eliminateOrderByKey
feiniaofeiafei Jan 21, 2025
825637a
add feut
feiniaofeiafei Jan 21, 2025
cc2bd5a
fix sort key order
feiniaofeiafei Jan 22, 2025
13500bb
fix fd eliminate
feiniaofeiafei Jan 22, 2025
dd6831a
fix regression
feiniaofeiafei Jan 22, 2025
19266c5
add nondeterministic process
feiniaofeiafei Jan 23, 2025
913c2f5
add skipDeleteBitmap process
feiniaofeiafei Jan 23, 2025
ff2481a
add test
feiniaofeiafei Jan 23, 2025
953db93
use nonfoldable, fix style
feiniaofeiafei Jan 23, 2025
057ef38
add regression variable log
feiniaofeiafei Jan 24, 2025
d8766b2
support window order by key
feiniaofeiafei Jan 24, 2025
a5cfd54
add window function case
feiniaofeiafei Jan 26, 2025
6979ea6
add test
feiniaofeiafei Jan 27, 2025
76edf05
fix
feiniaofeiafei Jan 27, 2025
3fa06ea
fix something about OnlyMetricTypeErrorMsg
feiniaofeiafei Feb 5, 2025
3505cf8
add equalset test
feiniaofeiafei Feb 5, 2025
d5d783c
add comment and remove useless code
feiniaofeiafei Feb 6, 2025
ac6dd2c
reduce loop order by key list
feiniaofeiafei Feb 7, 2025
abf0d22
reduce 2 loop
feiniaofeiafei Feb 7, 2025
5db1548
fix
feiniaofeiafei Feb 7, 2025
fd6fa17
change algorithm
feiniaofeiafei Feb 8, 2025
56826f5
reduce map
feiniaofeiafei Feb 8, 2025
8feb8f8
add test
feiniaofeiafei Feb 8, 2025
b0a5766
add test
feiniaofeiafei Feb 8, 2025
a1af3db
fix
feiniaofeiafei Feb 8, 2025
446e2db
add test
feiniaofeiafei Feb 12, 2025
87cc985
fix regression
feiniaofeiafei Feb 16, 2025
8ed9e50
fix feut
feiniaofeiafei Feb 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
import org.apache.doris.nereids.rules.rewrite.EliminateNotNull;
import org.apache.doris.nereids.rules.rewrite.EliminateNullAwareLeftAntiJoin;
import org.apache.doris.nereids.rules.rewrite.EliminateOrderByConstant;
import org.apache.doris.nereids.rules.rewrite.EliminateOrderByKey;
import org.apache.doris.nereids.rules.rewrite.EliminateSemiJoin;
import org.apache.doris.nereids.rules.rewrite.EliminateSort;
import org.apache.doris.nereids.rules.rewrite.EliminateSortUnderSubqueryOrView;
Expand Down Expand Up @@ -352,7 +353,8 @@ public class Rewriter extends AbstractBatchJobExecutor {
),
// this rule should invoke after ColumnPruning
custom(RuleType.ELIMINATE_UNNECESSARY_PROJECT, EliminateUnnecessaryProject::new),

topic("Eliminate Order By Key",
topDown(new EliminateOrderByKey())),
topic("Eliminate GroupBy",
topDown(new EliminateGroupBy(),
new MergeAggregate(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@
* Function dependence items.
*/
public class FuncDeps {
static class FuncDepsItem {
final Set<Slot> determinants;
final Set<Slot> dependencies;
/**FuncDepsItem*/
public static class FuncDepsItem {
public final Set<Slot> determinants;
public final Set<Slot> dependencies;

public FuncDepsItem(Set<Slot> determinants, Set<Slot> dependencies) {
this.determinants = ImmutableSet.copyOf(determinants);
Expand Down Expand Up @@ -64,16 +65,21 @@ public int hashCode() {
private final Set<FuncDepsItem> items;
// determinants -> dependencies
private final Map<Set<Slot>, Set<Set<Slot>>> edges;
// dependencies -> determinants
private final Map<Set<Slot>, Set<Set<Slot>>> redges;

public FuncDeps() {
items = new HashSet<>();
edges = new HashMap<>();
redges = new HashMap<>();
}

public void addFuncItems(Set<Slot> determinants, Set<Slot> dependencies) {
items.add(new FuncDepsItem(determinants, dependencies));
edges.computeIfAbsent(determinants, k -> new HashSet<>());
edges.get(determinants).add(dependencies);
redges.computeIfAbsent(dependencies, k -> new HashSet<>());
redges.get(dependencies).add(determinants);
}

public int size() {
Expand Down Expand Up @@ -185,6 +191,14 @@ public Set<FuncDeps.FuncDepsItem> getItems() {
return items;
}

public Map<Set<Slot>, Set<Set<Slot>>> getEdges() {
return edges;
}

public Map<Set<Slot>, Set<Set<Slot>>> getREdges() {
return redges;
}

/**
* find the determinants of dependencies
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ public enum RuleType {
ELIMINATE_JOIN_BY_FK(RuleTypeClass.REWRITE),
ELIMINATE_GROUP_BY_KEY(RuleTypeClass.REWRITE),
ELIMINATE_GROUP_BY_KEY_BY_UNIFORM(RuleTypeClass.REWRITE),
ELIMINATE_ORDER_BY_KEY(RuleTypeClass.REWRITE),
ELIMINATE_FILTER_GROUP_BY_KEY(RuleTypeClass.REWRITE),
ELIMINATE_DEDUP_JOIN_CONDITION(RuleTypeClass.REWRITE),
ELIMINATE_NULL_AWARE_LEFT_ANTI_JOIN(RuleTypeClass.REWRITE),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.rules.rewrite;

import org.apache.doris.catalog.Type;
import org.apache.doris.nereids.annotation.DependsRules;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.properties.DataTrait;
import org.apache.doris.nereids.properties.FuncDeps;
import org.apache.doris.nereids.properties.OrderKey;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Alias;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.expressions.OrderExpression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.WindowExpression;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalSort;
import org.apache.doris.nereids.trees.plans.logical.LogicalWindow;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
* 1.eliminate by duplicate
* select a from t1 order by a, a;
* ->
* select a from t1 order by a;
* 2.eliminate by function dependency
* select a from t1 order by a, a+1;
* select a from t1 order by a, abs(a) ;
* select a from t1 where a=c order by a,c
* ->
* select a from t1 order by a;
* 3.eliminate by uniform
* select a,b,c from test where a=1 order by a;
* ->
* select a,b,c from test where a=1;
* */
@DependsRules({
NormalizeSort.class,
ExtractAndNormalizeWindowExpression.class,
CheckAndStandardizeWindowFunctionAndFrame.class})
public class EliminateOrderByKey implements RewriteRuleFactory {
@Override
public List<Rule> buildRules() {
return ImmutableList.of(
logicalSort(any()).then(EliminateOrderByKey::eliminateSort).toRule(RuleType.ELIMINATE_ORDER_BY_KEY),
logicalWindow(any()).then(EliminateOrderByKey::eliminateWindow)
Comment on lines +71 to +72
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
logicalSort(any()).then(EliminateOrderByKey::eliminateSort).toRule(RuleType.ELIMINATE_ORDER_BY_KEY),
logicalWindow(any()).then(EliminateOrderByKey::eliminateWindow)
logicalSort().then(EliminateOrderByKey::eliminateSort).toRule(RuleType.ELIMINATE_ORDER_BY_KEY),
logicalWindow().then(EliminateOrderByKey::eliminateWindow)

.toRule(RuleType.ELIMINATE_ORDER_BY_KEY));
}

private static Plan eliminateWindow(LogicalWindow<Plan> window) {
DataTrait dataTrait = window.child().getLogicalProperties().getTrait();
List<NamedExpression> newNamedExpressions = new ArrayList<>();
boolean changed = false;
for (NamedExpression expr : window.getWindowExpressions()) {
Alias alias = (Alias) expr;
WindowExpression windowExpression = (WindowExpression) alias.child();
List<OrderExpression> orderExpressions = windowExpression.getOrderKeys();
if (orderExpressions.stream().anyMatch((
orderKey -> orderKey.getDataType().isOnlyMetricType()))) {
throw new AnalysisException(Type.OnlyMetricTypeErrorMsg);
}
List<OrderKey> orderKeys = new ArrayList<>();
for (OrderExpression orderExpression : orderExpressions) {
orderKeys.add(orderExpression.getOrderKey());
}
List<OrderKey> retainExpression = eliminate(dataTrait, orderKeys);
if (retainExpression.size() == orderKeys.size()) {
newNamedExpressions.add(expr);
continue;
}
changed = true;
List<OrderExpression> newOrderExpressions = new ArrayList<>();
for (OrderKey orderKey : retainExpression) {
newOrderExpressions.add(new OrderExpression(orderKey));
}
WindowExpression newWindowExpression = windowExpression.withOrderKeys(newOrderExpressions);
newNamedExpressions.add(alias.withChildren(ImmutableList.of(newWindowExpression)));
}
return changed ? window.withExpressionsAndChild(newNamedExpressions, window.child()) : window;
}

private static Plan eliminateSort(LogicalSort<Plan> sort) {
DataTrait dataTrait = sort.child().getLogicalProperties().getTrait();
List<OrderKey> retainExpression = eliminate(dataTrait, sort.getOrderKeys());
if (retainExpression.isEmpty()) {
return sort.child();
} else if (retainExpression.size() == sort.getOrderKeys().size()) {
return sort;
}
return sort.withOrderKeys(retainExpression);
}

private static List<OrderKey> eliminate(DataTrait dataTrait, List<OrderKey> inputOrderKeys) {
Set<Slot> validSlots = new HashSet<>();
for (OrderKey inputOrderKey : inputOrderKeys) {
Expression expr = inputOrderKey.getExpr();
if (!(expr instanceof Slot)) {
return inputOrderKeys;
}
validSlots.add((Slot) expr);
validSlots.addAll(dataTrait.calEqualSet((Slot) expr));
}
FuncDeps funcDeps = dataTrait.getAllValidFuncDeps(validSlots);
Map<Set<Slot>, Set<Set<Slot>>> redges = funcDeps.getREdges();

List<OrderKey> retainExpression = new ArrayList<>();
Set<Expression> orderExprWithEqualSet = new HashSet<>();
for (OrderKey inputOrderKey : inputOrderKeys) {
Expression expr = inputOrderKey.getExpr();
// eliminate by duplicate
if (orderExprWithEqualSet.contains(expr)) {
continue;
}
// eliminate by uniform
if (dataTrait.isUniformAndNotNull((Slot) expr)) {
orderExprWithEqualSet.add(expr);
orderExprWithEqualSet.addAll(dataTrait.calEqualSet((Slot) expr));
continue;
}
// eliminate by fd
Set<Slot> set = ImmutableSet.of((Slot) expr);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about expr is a + 1, and can't be located in redges? Is there necessary to extract all referenced column slot, etc?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or the expansion should do in validSlots collection?

boolean shouldRetain = true;
if (redges.containsKey(set)) {
Set<Set<Slot>> dominants = redges.get(set);
for (Set<Slot> dominant : dominants) {
if (orderExprWithEqualSet.containsAll(dominant)) {
shouldRetain = false;
break;
}
}
}
if (!shouldRetain) {
continue;
}
retainExpression.add(inputOrderKey);
orderExprWithEqualSet.add(expr);
orderExprWithEqualSet.addAll(dataTrait.calEqualSet((Slot) expr));
}
return retainExpression;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.doris.nereids.trees.plans.logical;

import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.MTMV;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Table;
Expand All @@ -36,6 +37,7 @@
import org.apache.doris.nereids.trees.plans.algebra.OlapScan;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.qe.ConnectContext;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
Expand Down Expand Up @@ -536,6 +538,12 @@ AGGREGATE KEY (siteid,citycode,username)
builder.addUniqueSlot(originalPlan.getLogicalProperties().getTrait());
builder.replaceUniqueBy(constructReplaceMap(mtmv));
} else if (getTable().getKeysType().isAggregationFamily() && !getTable().isRandomDistribution()) {
// When skipDeleteBitmap is set to true, in the unique model, rows that are replaced due to having the same
// unique key will also be read. As a result, the uniqueness of the unique key cannot be guaranteed.
if (ConnectContext.get().getSessionVariable().skipDeleteBitmap
Copy link
Contributor

@xzj7019 xzj7019 Feb 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what's the story here? and pls add comments.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

&& getTable().getKeysType() == KeysType.UNIQUE_KEYS) {
return;
}
ImmutableSet.Builder<Slot> uniqSlots = ImmutableSet.builderWithExpectedSize(outputSet.size());
for (Slot slot : outputSet) {
if (!(slot instanceof SlotReference)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,14 @@ public void computeEqualSet(DataTrait.Builder builder) {
public void computeFd(DataTrait.Builder builder) {
builder.addFuncDepsDG(child().getLogicalProperties().getTrait());
for (NamedExpression expr : getProjects()) {
if (!expr.isSlot()) {
builder.addDeps(expr.getInputSlots(), ImmutableSet.of(expr.toSlot()));
if (!(expr instanceof Alias)) {
continue;
}
// a+random(1,10) should continue, otherwise the a(determinant), a+random(1,10) (dependency) will be added.
if (expr.containsNonfoldable()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls add comments and cases to explain the handling logic

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

continue;
}
builder.addDeps(expr.getInputSlots(), ImmutableSet.of(expr.toSlot()));
}
}
}
Loading