Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
5ce6849
MvCombine Command Feature
Jan 6, 2026
33c9220
MvCombine Command Feature
Jan 6, 2026
3ee160b
Add doctests to MvCombine
Jan 7, 2026
5bc0d98
spotlesscheck apply
Jan 7, 2026
b49b0e7
Merge branch 'opensearch-project:main' into feature/mvcombine
srikanthpadakanti Jan 7, 2026
a5913af
spotlesscheck apply
Jan 7, 2026
5ac0f3d
spotlesscheck apply
Jan 7, 2026
dc4da50
Merge branch 'opensearch-project:main' into feature/mvcombine
srikanthpadakanti Jan 8, 2026
e4150e4
spotlessapply
Jan 8, 2026
dba85da
Address coderrabbit comments
Jan 8, 2026
6750593
Address coderrabbit comments
Jan 8, 2026
b2a9d9f
Address coderrabbit comments
Jan 8, 2026
6510d2b
Address coderrabbit comments
Jan 8, 2026
a93660b
Address coderrabbit comments
Jan 8, 2026
776fcad
Address coderrabbit comments
Jan 8, 2026
9a83103
Add mvcombine to index.md
Jan 8, 2026
c72c45b
Merge branch 'opensearch-project:main' into feature/mvcombine
srikanthpadakanti Jan 9, 2026
26ba22f
Remove the nomv related implementation as that command is still not y…
Jan 9, 2026
469b048
Remove the nomv related implementation as that command is still not y…
Jan 9, 2026
1259270
Remove the nomv related implementation as that command is still not y…
Jan 9, 2026
11fb3e0
Remove the nomv related implementation as that command is still not y…
Jan 9, 2026
22f542c
Merge branch 'opensearch-project:main' into feature/mvcombine
srikanthpadakanti Jan 14, 2026
2771d0b
complete the checklist from ppl-commands.md
Jan 14, 2026
6dfbf00
Merge branch 'main' into feature/mvcombine
srikanthpadakanti Jan 16, 2026
612eaa7
spotlessApply
Jan 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
import org.opensearch.sql.ast.tree.Lookup;
import org.opensearch.sql.ast.tree.ML;
import org.opensearch.sql.ast.tree.Multisearch;
import org.opensearch.sql.ast.tree.MvCombine;
import org.opensearch.sql.ast.tree.Paginate;
import org.opensearch.sql.ast.tree.Parse;
import org.opensearch.sql.ast.tree.Patterns;
Expand Down Expand Up @@ -534,6 +535,11 @@ public LogicalPlan visitAddColTotals(AddColTotals node, AnalysisContext context)
throw getOnlyForCalciteException("addcoltotals");
}

@Override
public LogicalPlan visitMvCombine(MvCombine node, AnalysisContext context) {
throw getOnlyForCalciteException("mvcombine");
}

/** Build {@link ParseExpression} to context and skip to child nodes. */
@Override
public LogicalPlan visitParse(Parse node, AnalysisContext context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
import org.opensearch.sql.ast.tree.Lookup;
import org.opensearch.sql.ast.tree.ML;
import org.opensearch.sql.ast.tree.Multisearch;
import org.opensearch.sql.ast.tree.MvCombine;
import org.opensearch.sql.ast.tree.Paginate;
import org.opensearch.sql.ast.tree.Parse;
import org.opensearch.sql.ast.tree.Patterns;
Expand Down Expand Up @@ -461,4 +462,8 @@ public T visitAddTotals(AddTotals node, C context) {
public T visitAddColTotals(AddColTotals node, C context) {
return visitChildren(node, context);
}

public T visitMvCombine(MvCombine node, C context) {
return visitChildren(node, context);
}
}
9 changes: 9 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
import org.opensearch.sql.ast.tree.Head;
import org.opensearch.sql.ast.tree.Limit;
import org.opensearch.sql.ast.tree.MinSpanBin;
import org.opensearch.sql.ast.tree.MvCombine;
import org.opensearch.sql.ast.tree.Parse;
import org.opensearch.sql.ast.tree.Patterns;
import org.opensearch.sql.ast.tree.Project;
Expand Down Expand Up @@ -468,6 +469,14 @@ public static List<Argument> defaultDedupArgs() {
argument("consecutive", booleanLiteral(false)));
}

public static MvCombine mvcombine(Field field) {
return new MvCombine(field, null);
}

public static MvCombine mvcombine(Field field, String delim) {
return new MvCombine(field, delim);
}

public static List<Argument> sortOptions() {
return exprList(argument("desc", booleanLiteral(false)));
}
Expand Down
45 changes: 45 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/tree/MvCombine.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ast.tree;

import com.google.common.collect.ImmutableList;
import java.util.List;
import javax.annotation.Nullable;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.expression.Field;

@Getter
@ToString(callSuper = true)
@EqualsAndHashCode(callSuper = false)
public class MvCombine extends UnresolvedPlan {

private final Field field;
private final String delim;
@Nullable private UnresolvedPlan child;

public MvCombine(Field field, @Nullable String delim) {
this.field = field;
this.delim = (delim == null) ? " " : delim;
}

public MvCombine attach(UnresolvedPlan child) {
this.child = child;
return this;
}

@Override
public List<UnresolvedPlan> getChild() {
return child == null ? ImmutableList.of() : ImmutableList.of(child);
}

@Override
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
return nodeVisitor.visitMvCombine(this, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFamily;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexCorrelVariable;
import org.apache.calcite.rex.RexInputRef;
Expand Down Expand Up @@ -121,6 +122,7 @@
import org.opensearch.sql.ast.tree.Lookup.OutputStrategy;
import org.opensearch.sql.ast.tree.ML;
import org.opensearch.sql.ast.tree.Multisearch;
import org.opensearch.sql.ast.tree.MvCombine;
import org.opensearch.sql.ast.tree.Paginate;
import org.opensearch.sql.ast.tree.Parse;
import org.opensearch.sql.ast.tree.Patterns;
Expand Down Expand Up @@ -3086,6 +3088,191 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) {
return context.relBuilder.peek();
}

/**
* mvcombine command visitor to collapse rows that are identical on all fields except the target
* field, and combine the target field values into a multivalue (array) field.
*
* <p>Implementation notes:Groups by all input fields except the target field. Aggregates target
* values using {@code COLLECT} (MULTISET). Casts the aggregation result from MULTISET to ARRAY
* for a stable multivalue output type. Preserves the original output column order.
*
* @param node mvcombine command to be visited
* @param context CalcitePlanContext containing the RelBuilder, RexBuilder, and resolution context
* @return RelNode representing collapsed records with the target combined into a multivalue array
* @throws SemanticCheckException if the mvcombine target is not a direct field reference
*/
@Override
public RelNode visitMvCombine(MvCombine node, CalcitePlanContext context) {
// 1) Lower the child plan first so the RelBuilder has the input schema on the stack.
visitChildren(node, context);

final RelBuilder relBuilder = context.relBuilder;
final RexBuilder rexBuilder = context.rexBuilder;

final RelNode input = relBuilder.peek();
final List<String> inputFieldNames = input.getRowType().getFieldNames();

// 2) Resolve the mvcombine target to an input column index (must be a direct field reference).
final Field targetField = node.getField();
final int targetIndex = resolveTargetIndex(targetField, context);
final String targetName = inputFieldNames.get(targetIndex);

// 3) Group by all fields except the target.
final List<RexNode> groupExprs =
buildGroupExpressionsExcludingTarget(targetIndex, inputFieldNames, relBuilder);

// 4) Aggregate target values using COLLECT, filtering out NULLs.
performCollectAggregation(relBuilder, targetIndex, targetName, groupExprs);

// 5) Restore original output column order, and cast COLLECT's MULTISET output to ARRAY<T>.
restoreColumnOrderWithArrayCast(
relBuilder, rexBuilder, input, inputFieldNames, targetIndex, groupExprs);

return relBuilder.peek();
}

/**
* Resolves the mvcombine target expression to an input field index.
*
* <p>mvcombine requires the target to be a direct field reference (RexInputRef). This keeps the
* command semantics predictable and avoids accidental grouping on computed expressions.
*
* <p>The target must also be a scalar-ish field. mvcombine outputs ARRAY&lt;T&gt;, so the input
* target cannot already be an ARRAY or MULTISET.
*
* @param targetField Target field expression from the AST
* @param context Planning context
* @return 0-based input field index for the target
* @throws SemanticCheckException if the target is not a direct field reference or has an array
* type
*/
private int resolveTargetIndex(Field targetField, CalcitePlanContext context) {
final RexNode targetRex = rexVisitor.analyze(targetField, context);
if (!(targetRex instanceof RexInputRef)) {
throw new SemanticCheckException(
"mvcombine target must be a direct field reference, but got: " + targetField);
}

final int index = ((RexInputRef) targetRex).getIndex();

final RelDataType fieldType =
context.relBuilder.peek().getRowType().getFieldList().get(index).getType();

if (fieldType.getSqlTypeName() == org.apache.calcite.sql.type.SqlTypeName.ARRAY
|| fieldType.getSqlTypeName() == org.apache.calcite.sql.type.SqlTypeName.MULTISET) {
throw new SemanticCheckException(
"mvcombine target cannot be an array/multivalue type, but got: " + fieldType);
}

return index;
}

/**
* Builds group-by expressions for mvcombine: all input fields except the target field.
*
* @param targetIndex Input index of the mvcombine target field
* @param inputFieldNames Input schema field names (for sizing/ordering)
* @param relBuilder RelBuilder positioned on the input
* @return Group-by expressions in input order excluding the target
*/
private List<RexNode> buildGroupExpressionsExcludingTarget(
int targetIndex, List<String> inputFieldNames, RelBuilder relBuilder) {
final List<RexNode> groupExprs = new ArrayList<>(Math.max(0, inputFieldNames.size() - 1));
for (int i = 0; i < inputFieldNames.size(); i++) {
if (i == targetIndex) {
continue;
}
groupExprs.add(relBuilder.field(i));
}
return groupExprs;
}

/**
* Applies mvcombine aggregation:
*
* <p>GROUP BY all non-target fields, and aggregate target values using {@code COLLECT}. {@code
* COLLECT} produces a MULTISET in Calcite, which we later cast to ARRAY for output.
*
* <p>NULL target values are excluded from the collected multivalue list by applying an aggregate
* filter. This matches typical "combine values" semantics and avoids polluting the result with
* NULL elements.
*
* @param relBuilder RelBuilder positioned on the input
* @param targetIndex Target field input index
* @param targetName Target field output name (preserved)
* @param groupExprs Group-by expressions (all fields except target)
*/
private void performCollectAggregation(
RelBuilder relBuilder, int targetIndex, String targetName, List<RexNode> groupExprs) {

final RexNode targetRef = relBuilder.field(targetIndex);
final RexNode notNullTarget = relBuilder.isNotNull(targetRef);

final RelBuilder.AggCall aggCall =
relBuilder
.aggregateCall(SqlStdOperatorTable.COLLECT, targetRef)
.filter(notNullTarget)
.as(targetName);

relBuilder.aggregate(relBuilder.groupKey(groupExprs), aggCall);
Comment on lines +3205 to +3217
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🌐 Web query:

Apache Calcite COLLECT aggregate function ordering preservation insertion order multiset

💡 Result:

Short answer: COLLECT in Apache Calcite produces a MULTISET, which by SQL standard is an unordered collection — insertion order is not guaranteed or preserved. Calcite’s COLLECT implementation does not provide a defined ordering and historically there has been work / discussion to replace Collect with ordered-aggregation alternatives (ARRAY_AGG / MULTISET_AGG and ORDER BY / WITHIN GROUP) when ordering matters. [1][2][3][4]

Sources:

  • Calcite SQL reference: MULTISET and collection functions (shows MULTISET/COLLECT semantics). [1]
  • Calcite Collect RelNode Javadoc (Collect produces a MULTISET). [2]
  • JIRA discussion about WITHIN GROUP / ORDER BY for COLLECT and related behavior. [3]
  • JIRA proposing obsoleting Collect in favor of Aggregate/ARRAY_AGG / MULTISET_AGG (notes ordering/use cases). [4]

🏁 Script executed:

# Search for mvcombine documentation and context in the codebase
rg -i "mvcombine" --type java -B 5 -A 5

Repository: opensearch-project/sql

Length of output: 50378


🏁 Script executed:

# Look for ordering-related comments or workarounds in CalciteRelNodeVisitor
rg -i "order|collect" core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java -B 2 -A 2

Repository: opensearch-project/sql

Length of output: 10985


COLLECT aggregate does not preserve input order; verify if mvcombine specification requires order preservation.

The web search confirms that Apache Calcite's COLLECT function produces an unordered MULTISET with no guaranteed insertion order preservation. The current implementation applies COLLECT without an ORDER BY clause, and the subsequent cast to ARRAY does not restore insertion order. If the mvcombine specification requires input stream order to be preserved, the current implementation using COLLECT alone cannot guarantee this behavior. Calcite has documented alternatives like ARRAY_AGG with WITHIN GROUP (ORDER BY) for ordered aggregations. Either confirm that order preservation is not a requirement, or implement an ordered aggregation approach using a synthetic row number or ARRAY_AGG with explicit ordering.

🤖 Prompt for AI Agents
In `@core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java`
around lines 3205 - 3217, performCollectAggregation uses Calcite's unordered
COLLECT via relBuilder.aggregateCall(SqlStdOperatorTable.COLLECT, ...) which
doesn't preserve input order; either confirm mvcombine doesn't require order or
change to an ordered aggregation: replace COLLECT with an ordered
ARRAY_AGG-style aggregation (e.g., ARRAY_AGG with WITHIN GROUP (ORDER BY)) or
synthesize a row-order key (add row_number per input, include it in group and
use ARRAY_AGG ordered by that key) so that relBuilder.aggregate(...) produces a
deterministic ordered array; update performCollectAggregation (and its use of
relBuilder.aggregateCall and subsequent casting) accordingly.

}

/**
* Restores the original output column order after the aggregate step and converts the collected
* target from MULTISET to ARRAY&lt;T&gt;.
*
* <p>After aggregation, the schema is:
*
* <pre>
* [groupField0, groupField1, ..., groupFieldN, targetAggMultiset]
* </pre>
*
* <p>This method projects fields back to the original input order, replacing the original target
* slot with {@code CAST(targetAggMultiset AS ARRAY&lt;T&gt;)}.
*
* @param relBuilder RelBuilder positioned on the post-aggregate node
* @param rexBuilder RexBuilder for explicit casts
* @param input Original input RelNode (used to derive the target element type)
* @param inputFieldNames Original input field names (also output field names)
* @param targetIndex Target field index in the original input
* @param groupExprs Group-by expressions used during aggregation
*/
private void restoreColumnOrderWithArrayCast(
RelBuilder relBuilder,
RexBuilder rexBuilder,
RelNode input,
List<String> inputFieldNames,
int targetIndex,
List<RexNode> groupExprs) {

// Post-aggregate: group fields come first, and the collected target is appended at the end.
final int collectedTargetPos = groupExprs.size();

final RelDataType targetElemType = input.getRowType().getFieldList().get(targetIndex).getType();
final RelDataType targetArrayType =
relBuilder.getTypeFactory().createArrayType(targetElemType, -1);

final List<RexNode> projections = new ArrayList<>(inputFieldNames.size());
final List<String> projectionNames = new ArrayList<>(inputFieldNames.size());

int groupPos = 0;
for (int i = 0; i < inputFieldNames.size(); i++) {
projectionNames.add(inputFieldNames.get(i));

if (i == targetIndex) {
// COLLECT returns MULTISET; normalize output to ARRAY<T>.
final RexNode multisetRef = relBuilder.field(collectedTargetPos);
projections.add(rexBuilder.makeCast(targetArrayType, multisetRef));
} else {
projections.add(relBuilder.field(groupPos));
groupPos++;
}
}

// Force projection to avoid Calcite "identity" short-circuit when only names/types change.
relBuilder.project(projections, projectionNames, /* force= */ true);
}

@Override
public RelNode visitValues(Values values, CalcitePlanContext context) {
if (values.getValues() == null || values.getValues().isEmpty()) {
Expand Down
1 change: 1 addition & 0 deletions docs/category.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"user/ppl/cmd/sort.md",
"user/ppl/cmd/spath.md",
"user/ppl/cmd/stats.md",
"user/ppl/cmd/mvcombine.md",
"user/ppl/cmd/streamstats.md",
"user/ppl/cmd/subquery.md",
"user/ppl/cmd/syntax.md",
Expand Down
3 changes: 2 additions & 1 deletion docs/user/dql/metadata.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Example 1: Show All Indices Information
SQL query::

os> SHOW TABLES LIKE '%'
fetched rows / total rows = 23/23
fetched rows / total rows = 24/24
+----------------+-------------+-------------------+------------+---------+----------+------------+-----------+---------------------------+----------------+
| TABLE_CAT | TABLE_SCHEM | TABLE_NAME | TABLE_TYPE | REMARKS | TYPE_CAT | TYPE_SCHEM | TYPE_NAME | SELF_REFERENCING_COL_NAME | REF_GENERATION |
|----------------+-------------+-------------------+------------+---------+----------+------------+-----------+---------------------------+----------------|
Expand All @@ -48,6 +48,7 @@ SQL query::
| docTestCluster | null | events_many_hosts | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | events_null | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | json_test | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | mvcombine_data | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | nested | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | nyc_taxi | BASE TABLE | null | null | null | null | null | null |
| docTestCluster | null | occupation | BASE TABLE | null | null | null | null | null | null |
Expand Down
Loading