peng - isolate the fix logic to its own visitor class

RyanL1997 · RyanL1997 · commit 3dfd44b476c2 · 2025-12-01T12:02:21.000-08:00
Signed-off-by: Jialiang Liang &lt;jiallian@amazon.com&gt;
diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java
@@ -8,7 +8,6 @@
 import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY;
 
 import java.sql.Connection;
-import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -62,12 +61,6 @@ public class CalcitePlanContext {
 
   @Getter public Map<String, RexLambdaRef> rexLambdaRefMap;
 
-  /** Accumulated filter conditions to prevent deep Filter node chains */
-  private final List<RexNode> pendingFilterConditions = new ArrayList<>();
-
-  /** Flag to indicate if filter accumulation mode is active */
-  @Getter @Setter private boolean filterAccumulationEnabled = false;
-
   private CalcitePlanContext(FrameworkConfig config, SysLimit sysLimit, QueryType queryType) {
     this.config = config;
     this.sysLimit = sysLimit;
@@ -141,52 +134,4 @@ public static boolean isLegacyPreferred() {
   public void putRexLambdaRefMap(Map<String, RexLambdaRef> candidateMap) {
     this.rexLambdaRefMap.putAll(candidateMap);
   }
-
-  /**
-   * Adds a filter condition to the accumulation list instead of creating immediate Filter RelNode.
-   * This prevents deep Filter node chains that cause memory explosion.
-   */
-  public void addFilterCondition(RexNode condition) {
-    pendingFilterConditions.add(condition);
-  }
-
-  /**
-   * Applies all accumulated filter conditions as a single Filter RelNode with AND operations. This
-   * creates a single Filter node instead of a deep chain of Filter nodes.
-   */
-  public void flushFilterConditions() {
-    if (pendingFilterConditions.isEmpty()) {
-      return;
-    }
-
-    if (pendingFilterConditions.size() == 1) {
-      relBuilder.filter(pendingFilterConditions.get(0));
-    } else {
-      // Combine all filter conditions with AND
-      RexNode combinedCondition = relBuilder.and(pendingFilterConditions);
-      relBuilder.filter(combinedCondition);
-    }
-    pendingFilterConditions.clear();
-  }
-
-  /**
-   * Enables filter accumulation mode to prevent deep Filter node chains. Should be called before
-   * processing multiple filter operations.
-   */
-  public void enableFilterAccumulation() {
-    filterAccumulationEnabled = true;
-  }
-
-  /**
-   * Disables filter accumulation mode. Should be called after processing multiple filter
-   * operations.
-   */
-  public void disableFilterAccumulation() {
-    filterAccumulationEnabled = false;
-  }
-
-  /** Returns true if there are pending filter conditions that need to be flushed. */
-  public boolean hasPendingFilterConditions() {
-    return !pendingFilterConditions.isEmpty();
-  }
 }
diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java
@@ -176,31 +176,13 @@ public CalciteRelNodeVisitor(DataSourceService dataSourceService) {
   }
 
   public RelNode analyze(UnresolvedPlan unresolved, CalcitePlanContext context) {
-    // Enable filter accumulation if this plan contains multiple filtering operations
-    // that could create deep Filter RelNode chains
-    if (countFilteringOperations(unresolved) >= 2) {
-      context.enableFilterAccumulation();
-      try {
-        unresolved.accept(this, context);
-        context.flushFilterConditions();
-        return context.relBuilder.peek();
-      } finally {
-        context.disableFilterAccumulation();
-      }
-    } else {
-      return unresolved.accept(this, context);
-    }
-  }
+    // Build the RelNode tree (may contain deep Filter chains)
+    RelNode relNode = unresolved.accept(this, context);
 
-  /**
-   * Flushes accumulated filter conditions before schema-changing operations. This prevents
-   * RexInputRef index mismatches that occur when filters reference field indices from the old
-   * schema.
-   */
-  private void flushFiltersBeforeSchemaChange(CalcitePlanContext context) {
-    if (context.isFilterAccumulationEnabled() && context.hasPendingFilterConditions()) {
-      context.flushFilterConditions();
-    }
+    // Apply filter merge optimization as post-processing
+    // This merges consecutive LogicalFilter nodes to prevent OOM with deep chains
+    FilterMergeVisitor filterMergeVisitor = new FilterMergeVisitor();
+    return relNode.accept(filterMergeVisitor);
   }
 
   @Override
@@ -268,12 +250,7 @@ public RelNode visitFilter(Filter node, CalcitePlanContext context) {
       context.relBuilder.filter(ImmutableList.of(v.get().id), condition);
       context.popCorrelVar();
     } else {
-      // Use filter accumulation to prevent deep Filter node chains
-      if (context.isFilterAccumulationEnabled()) {
-        context.addFilterCondition(condition);
-      } else {
-        context.relBuilder.filter(condition);
-      }
+      context.relBuilder.filter(condition);
     }
     return context.relBuilder.peek();
   }
@@ -322,20 +299,13 @@ public RelNode visitRegex(Regex node, CalcitePlanContext context) {
       regexCondition = context.rexBuilder.makeCall(SqlStdOperatorTable.NOT, regexCondition);
     }
 
-    // Use filter accumulation to prevent deep Filter node chains
-    if (context.isFilterAccumulationEnabled()) {
-      context.addFilterCondition(regexCondition);
-    } else {
-      context.relBuilder.filter(regexCondition);
-    }
+    context.relBuilder.filter(regexCondition);
     return context.relBuilder.peek();
   }
 
   public RelNode visitRex(Rex node, CalcitePlanContext context) {
     visitChildren(node, context);
 
-    flushFiltersBeforeSchemaChange(context);
-
     RexNode fieldRex = rexVisitor.analyze(node.getField(), context);
     String patternStr = (String) node.getPattern().getValue();
 
@@ -420,8 +390,6 @@ private boolean containsSubqueryExpression(Node expr) {
   public RelNode visitProject(Project node, CalcitePlanContext context) {
     visitChildren(node, context);
 
-    flushFiltersBeforeSchemaChange(context);
-
     if (isSingleAllFieldsProject(node)) {
       return handleAllFieldsProject(node, context);
     }
@@ -736,8 +704,6 @@ public RelNode visitReverse(
   public RelNode visitBin(Bin node, CalcitePlanContext context) {
     visitChildren(node, context);
 
-    flushFiltersBeforeSchemaChange(context);
-
     RexNode fieldExpr = rexVisitor.analyze(node.getField(), context);
     String fieldName = BinUtils.extractFieldName(node);
 
@@ -752,7 +718,6 @@ public RelNode visitBin(Bin node, CalcitePlanContext context) {
   @Override
   public RelNode visitParse(Parse node, CalcitePlanContext context) {
     visitChildren(node, context);
-    flushFiltersBeforeSchemaChange(context);
     buildParseRelNode(node, context);
     return context.relBuilder.peek();
   }
@@ -900,8 +865,6 @@ public RelNode visitPatterns(Patterns node, CalcitePlanContext context) {
   public RelNode visitEval(Eval node, CalcitePlanContext context) {
     visitChildren(node, context);
 
-    flushFiltersBeforeSchemaChange(context);
-
     node.getExpressionList()
         .forEach(
             expr -> {
@@ -1171,9 +1134,6 @@ private Pair<List<RexNode>, List<AggCall>> resolveAttributesForAggregation(
   /** Visits an aggregation for stats command */
   @Override
   public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) {
-    // Flush accumulated filter conditions before schema-changing aggregation operations
-    flushFiltersBeforeSchemaChange(context);
-
     Argument.ArgumentMap statsArgs = Argument.ArgumentMap.of(node.getArgExprList());
     Boolean bucketNullable = (Boolean) statsArgs.get(Argument.BUCKET_NULLABLE).getValue();
     int nGroup = node.getGroupExprList().size() + (Objects.nonNull(node.getSpan()) ? 1 : 0);
@@ -2292,26 +2252,11 @@ private RelNode mergeTableAndResolveColumnConflict(
   @Override
   public RelNode visitMultisearch(Multisearch node, CalcitePlanContext context) {
     List<RelNode> subsearchNodes = new ArrayList<>();
-    // Save the current filter accumulation state - we'll process each subsearch independently
-    boolean wasFilterAccumulationEnabled = context.isFilterAccumulationEnabled();
 
     for (UnresolvedPlan subsearch : node.getSubsearches()) {
       UnresolvedPlan prunedSubSearch = subsearch.accept(new EmptySourcePropagateVisitor(), null);
-
-      // Temporarily disable filter accumulation so each subsearch gets its own independent
-      // lifecycle via analyze(). This prevents filter state from bleeding across branches.
-      if (wasFilterAccumulationEnabled) {
-        context.disableFilterAccumulation();
-      }
-
-      // Use analyze() to let each subsearch determine its own filter accumulation needs
       analyze(prunedSubSearch, context);
       subsearchNodes.add(context.relBuilder.build());
-
-      // Restore filter accumulation state for the next iteration
-      if (wasFilterAccumulationEnabled) {
-        context.enableFilterAccumulation();
-      }
     }
 
     // Use shared schema merging logic that handles type conflicts via field renaming
@@ -3302,82 +3247,4 @@ private RexNode createOptimizedTransliteration(
       throw new RuntimeException("Failed to optimize sed expression: " + sedExpression, e);
     }
   }
-
-  /**
-   * Counts the number of filtering operations in an UnresolvedPlan tree that would create Filter
-   * RelNodes. This is used to detect queries with multiple regex/filter operations that could cause
-   * deep Filter RelNode chains and memory exhaustion.
-   *
-   * <p>Stops counting at schema-changing operations (like Aggregation, Project with computed
-   * expressions) to avoid enabling filter accumulation across schema boundaries, which would cause
-   * RexInputRef index mismatches.
-   *
-   * @param plan the UnresolvedPlan to analyze
-   * @return the count of filtering operations found before the first schema-changing operation
-   */
-  private int countFilteringOperations(UnresolvedPlan plan) {
-    if (plan == null) {
-      return 0;
-    }
-
-    int count = 0;
-
-    // Count this node if it's a filtering operation
-    // BUT: Don't count Filter nodes that contain function calls, as they can cause
-    // type mismatches when accumulated and flushed later
-    if (plan instanceof Regex) {
-      count = 1;
-    } else if (plan instanceof Filter) {
-      Filter filterNode = (Filter) plan;
-      if (!containsFunctionCall(filterNode.getCondition())) {
-        count = 1;
-      }
-    }
-
-    // Stop counting at schema-changing operations to prevent accumulation across schema boundaries
-    // Schema-changing operations include: Aggregation, Eval, Project (with computed expressions),
-    // Window, StreamWindow, etc.
-    if (plan instanceof Aggregation
-        || plan instanceof Eval
-        || plan instanceof Window
-        || plan instanceof StreamWindow) {
-      return count; // Don't recurse into children beyond schema changes
-    }
-
-    // Recursively count filtering operations in children
-    if (plan.getChild() != null) {
-      for (Node child : plan.getChild()) {
-        if (child instanceof UnresolvedPlan) {
-          count += countFilteringOperations((UnresolvedPlan) child);
-        }
-      }
-    }
-
-    return count;
-  }
-
-  /**
-   * Checks if an expression contains any function calls. Filter expressions with function calls can
-   * cause type mismatches when accumulated and flushed later, so we exclude them from filter
-   * accumulation.
-   */
-  private boolean containsFunctionCall(UnresolvedExpression expr) {
-    if (expr == null) {
-      return false;
-    }
-
-    if (expr instanceof org.opensearch.sql.ast.expression.Function) {
-      return true;
-    }
-
-    // Check children recursively
-    for (Node child : expr.getChild()) {
-      if (child instanceof UnresolvedExpression
-          && containsFunctionCall((UnresolvedExpression) child)) {
-        return true;
-      }
-    }
-
-    return false;
-  }
 }
diff --git a/core/src/main/java/org/opensearch/sql/calcite/FilterMergeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/FilterMergeVisitor.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.calcite;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelShuttleImpl;
+import org.apache.calcite.rel.logical.LogicalFilter;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+
+/**
+ * A RelNode visitor that merges consecutive LogicalFilter nodes into a single filter with combined
+ * AND conditions. This prevents deep Filter RelNode chains that cause memory exhaustion (OOM) with
+ * multiple filter operations.
+ *
+ * <p>Example transformation:
+ *
+ * <pre>
+ * BEFORE:
+ *   LogicalFilter(age > 30)
+ *     LogicalFilter(age < 40)
+ *       LogicalFilter(balance > 10000)
+ *         TableScan
+ *
+ * AFTER:
+ *   LogicalFilter(AND(age > 30, age < 40, balance > 10000))
+ *     TableScan
+ * </pre>
+ *
+ * This is a post-processing optimization that runs after the RelNode tree is constructed by
+ * CalciteRelNodeVisitor.
+ */
+public class FilterMergeVisitor extends RelShuttleImpl {
+
+  /**
+   * Visits a LogicalFilter node and merges it with consecutive child LogicalFilter nodes.
+   *
+   * @param filter the LogicalFilter node to visit
+   * @return the merged filter or the original filter if no merging is needed
+   */
+  @Override
+  public RelNode visit(LogicalFilter filter) {
+    RelNode newInput = filter.getInput().accept(this);
+
+    List<RexNode> conditions = new ArrayList<>();
+    conditions.add(filter.getCondition());
+
+    RelNode current = newInput;
+    while (current instanceof LogicalFilter) {
+      LogicalFilter childFilter = (LogicalFilter) current;
+      conditions.add(childFilter.getCondition());
+      current = childFilter.getInput();
+    }
+
+    // If we collected multiple conditions, merge them
+    if (conditions.size() > 1) {
+      RelOptCluster cluster = filter.getCluster();
+      RexBuilder rexBuilder = cluster.getRexBuilder();
+
+      // Combine all conditions with AND
+      RexNode combinedCondition =
+          rexBuilder.makeCall(org.apache.calcite.sql.fun.SqlStdOperatorTable.AND, conditions);
+
+      // Simplify the combined condition (e.g., remove redundant TRUE, optimize)
+      combinedCondition = org.apache.calcite.rex.RexUtil.simplify(rexBuilder, combinedCondition);
+
+      // Create a new filter with the combined condition
+      return LogicalFilter.create(current, combinedCondition);
+    }
+
+    if (newInput != filter.getInput()) {
+      return filter.copy(filter.getTraitSet(), newInput, filter.getCondition());
+    }
+
+    return filter;
+  }
+}