-
Notifications
You must be signed in to change notification settings - Fork 181
Mvexpand feature #4944
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Mvexpand feature #4944
Changes from all commits
8362fc2
384ba15
7f382f9
44c8124
3cad64e
8e4a2c5
474617d
d502b03
c62defe
a3799b2
d90be9f
da16288
1301e06
beb31de
627ef8f
58facf8
63cdbf7
bdc3aa1
fc8e345
c830356
e9b6f27
fa9436e
ea091d2
4d9b24d
b9d3164
26a59a4
43c806e
a07dff2
7be7473
2c0ea2c
8749289
9508874
08b56ee
3ae2c73
bed2084
5e616ff
709704c
c45fa05
4f3435e
a0b2c8c
47779e1
bf6b924
9aec421
bf87312
c9e2767
2591a6c
125cf3b
00c990f
44814ab
f9dd692
69d6a5a
2464675
0f86c52
32d3867
07509ae
34db739
602358e
b1f2e59
2adbf6f
559165f
f7d942d
6ca94e2
e747edb
587ccb2
600637f
79c9b9d
19c2065
16dbaad
1d0a56e
d3651f8
5efd096
e067b46
514a7dc
9e8ea2c
00f440b
840a454
50031ce
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| /* | ||
| * Copyright OpenSearch Contributors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package org.opensearch.sql.ast.tree; | ||
|
|
||
| import com.google.common.collect.ImmutableList; | ||
| import java.util.List; | ||
| import javax.annotation.Nullable; | ||
| import lombok.EqualsAndHashCode; | ||
| import lombok.Getter; | ||
| import lombok.ToString; | ||
| import org.opensearch.sql.ast.AbstractNodeVisitor; | ||
| import org.opensearch.sql.ast.expression.Field; | ||
|
|
||
| /** AST node representing the {@code mvexpand} PPL command: {@code mvexpand <field> [limit=N]}. */ | ||
| @ToString | ||
| @EqualsAndHashCode(callSuper = false) | ||
| public class MvExpand extends UnresolvedPlan { | ||
|
|
||
| private UnresolvedPlan child; | ||
| @Getter private final Field field; | ||
| @Getter @Nullable private final Integer limit; | ||
|
|
||
| public MvExpand(Field field, @Nullable Integer limit) { | ||
| this.field = field; | ||
| this.limit = limit; | ||
| } | ||
|
|
||
| @Override | ||
| public MvExpand attach(UnresolvedPlan child) { | ||
| this.child = child; | ||
| return this; | ||
| } | ||
|
|
||
| @Override | ||
| public List<UnresolvedPlan> getChild() { | ||
| return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); | ||
| } | ||
|
|
||
| @Override | ||
| public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) { | ||
| return nodeVisitor.visitMvExpand(this, context); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -51,6 +51,7 @@ | |
| import org.apache.calcite.rel.core.JoinRelType; | ||
| import org.apache.calcite.rel.logical.LogicalValues; | ||
| import org.apache.calcite.rel.type.RelDataType; | ||
| import org.apache.calcite.rel.type.RelDataTypeFactory; | ||
| import org.apache.calcite.rel.type.RelDataTypeFamily; | ||
| import org.apache.calcite.rel.type.RelDataTypeField; | ||
| import org.apache.calcite.rex.RexCall; | ||
|
|
@@ -122,6 +123,7 @@ | |
| import org.opensearch.sql.ast.tree.Lookup.OutputStrategy; | ||
| import org.opensearch.sql.ast.tree.ML; | ||
| import org.opensearch.sql.ast.tree.Multisearch; | ||
| import org.opensearch.sql.ast.tree.MvExpand; | ||
| import org.opensearch.sql.ast.tree.Paginate; | ||
| import org.opensearch.sql.ast.tree.Parse; | ||
| import org.opensearch.sql.ast.tree.Patterns; | ||
|
|
@@ -846,7 +848,11 @@ public RelNode visitPatterns(Patterns node, CalcitePlanContext context) { | |
| .toList(); | ||
| context.relBuilder.aggregate(context.relBuilder.groupKey(groupByList), aggCall); | ||
| buildExpandRelNode( | ||
| context.relBuilder.field(node.getAlias()), node.getAlias(), node.getAlias(), context); | ||
| context.relBuilder.field(node.getAlias()), | ||
| node.getAlias(), | ||
| node.getAlias(), | ||
| null, | ||
| context); | ||
| flattenParsedPattern( | ||
| node.getAlias(), | ||
| context.relBuilder.field(node.getAlias()), | ||
|
|
@@ -3111,11 +3117,82 @@ public RelNode visitExpand(Expand expand, CalcitePlanContext context) { | |
| RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(arrayField, context); | ||
| String alias = expand.getAlias(); | ||
|
|
||
| buildExpandRelNode(arrayFieldRex, arrayField.getField().toString(), alias, context); | ||
| buildExpandRelNode(arrayFieldRex, arrayField.getField().toString(), alias, null, context); | ||
|
|
||
| return context.relBuilder.peek(); | ||
| } | ||
|
|
||
| /** | ||
| * MVExpand command visitor. | ||
| * | ||
| * <p>For Calcite remote planning, mvexpand shares the same expansion mechanics as {@link Expand}: | ||
| * it unnests the target multivalue field and joins back to the original relation. The additional | ||
| * mvexpand semantics (such as an optional per-document limit) are surfaced via the MVExpand AST | ||
| * node but reuse the same underlying RelBuilder pipeline as expand at this layer. | ||
| * | ||
| * @param mvExpand MVExpand command to be visited | ||
| * @param context CalcitePlanContext containing the RelBuilder and other context | ||
| * @return RelNode representing records with the expanded multi-value field | ||
| */ | ||
| /** | ||
| * MVExpand command visitor. | ||
| * | ||
| * <p>For Calcite remote planning, mvexpand reuses the same expansion mechanics as {@link Expand}: | ||
| * it unnests the target multivalue field and joins back to the original relation. | ||
|
Comment on lines
+3125
to
+3141
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. duplicated javadoc |
||
| * mvexpand-specific semantics (such as an optional per-document limit) are carried by the {@link | ||
| * MvExpand} AST node and applied via the limit parameter passed into the shared expansion | ||
| * builder. | ||
| * | ||
| * <p>Missing-field behavior: if the target field does not exist in the input schema, mvexpand | ||
| * produces no rows while keeping the output schema stable. | ||
| * | ||
| * @param mvExpand MVExpand command to be visited | ||
| * @param context CalcitePlanContext containing the RelBuilder and other context | ||
| * @return RelNode representing records with the expanded multi-value field | ||
| */ | ||
| @Override | ||
| public RelNode visitMvExpand(MvExpand mvExpand, CalcitePlanContext context) { | ||
srikanthpadakanti marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| visitChildren(mvExpand, context); | ||
|
|
||
| final RelBuilder relBuilder = context.relBuilder; | ||
| final Field field = mvExpand.getField(); | ||
| final String fieldName = field.getField().toString(); | ||
|
|
||
| // Missing-field: produce no rows (but keep schema stable). | ||
| final RelDataType inputType = relBuilder.peek().getRowType(); | ||
| final RelDataTypeField inputField = | ||
| inputType.getField(fieldName, /*caseSensitive*/ false, /*elideRecord*/ false); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ppl is case sensitive for field name |
||
| if (inputField == null) { | ||
| return buildEmptyResultWithStableSchema(relBuilder, fieldName); | ||
| } | ||
|
Comment on lines
+3165
to
+3167
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can simplify to throw |
||
|
|
||
| // Resolve field ref using rexVisitor for consistent semantics (same as expand). | ||
| final RexInputRef arrayFieldRex = (RexInputRef) rexVisitor.analyze(field, context); | ||
|
|
||
| // Enforce ARRAY type before UNNEST so we return SemanticCheckException. | ||
| final SqlTypeName actual = arrayFieldRex.getType().getSqlTypeName(); | ||
| if (actual != SqlTypeName.ARRAY) { | ||
| throw new SemanticCheckException( | ||
| String.format( | ||
| "Cannot expand field '%s': expected ARRAY type but found %s", fieldName, actual)); | ||
| } | ||
|
Comment on lines
+3173
to
+3178
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is over restricted. |
||
|
|
||
| buildExpandRelNode(arrayFieldRex, fieldName, fieldName, mvExpand.getLimit(), context); | ||
| return relBuilder.peek(); | ||
| } | ||
|
|
||
| private static RelNode buildEmptyResultWithStableSchema(RelBuilder relBuilder, String fieldName) { | ||
| final RelDataTypeFactory typeFactory = relBuilder.getTypeFactory(); | ||
| final RelDataType arrayAny = | ||
| typeFactory.createArrayType(typeFactory.createSqlType(SqlTypeName.ANY), -1); | ||
|
|
||
| relBuilder.projectPlus( | ||
| List.of(relBuilder.alias(relBuilder.getRexBuilder().makeNullLiteral(arrayAny), fieldName))); | ||
|
|
||
| relBuilder.filter(relBuilder.literal(false)); | ||
| return relBuilder.peek(); | ||
| } | ||
|
|
||
| @Override | ||
| public RelNode visitValues(Values values, CalcitePlanContext context) { | ||
| if (values.getValues() == null || values.getValues().isEmpty()) { | ||
|
|
@@ -3360,7 +3437,11 @@ private void flattenParsedPattern( | |
| } | ||
|
|
||
| private void buildExpandRelNode( | ||
| RexInputRef arrayFieldRex, String arrayFieldName, String alias, CalcitePlanContext context) { | ||
| RexInputRef arrayFieldRex, | ||
| String arrayFieldName, | ||
| String alias, | ||
| @Nullable Integer perDocLimit, | ||
| CalcitePlanContext context) { | ||
| // 3. Capture the outer row in a CorrelationId | ||
| Holder<RexCorrelVariable> correlVariable = Holder.empty(); | ||
| context.relBuilder.variable(correlVariable::set); | ||
|
|
@@ -3375,14 +3456,17 @@ private void buildExpandRelNode( | |
| RelNode leftNode = context.relBuilder.build(); | ||
|
|
||
| // 5. Build join right node and expand the array field using uncollect | ||
| RelNode rightNode = | ||
| context | ||
| .relBuilder | ||
| // fake input, see convertUnnest and convertExpression in Calcite SqlToRelConverter | ||
| .push(LogicalValues.createOneRow(context.relBuilder.getCluster())) | ||
| .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)) | ||
| .uncollect(List.of(), false) | ||
| .build(); | ||
| context | ||
| .relBuilder | ||
| // fake input, see convertUnnest and convertExpression in Calcite SqlToRelConverter | ||
| .push(LogicalValues.createOneRow(context.relBuilder.getCluster())) | ||
| .project(List.of(correlArrayFieldAccess), List.of(arrayFieldName)) | ||
| .uncollect(List.of(), false); | ||
|
|
||
| if (perDocLimit != null) { | ||
| context.relBuilder.limit(0, perDocLimit); | ||
| } | ||
| RelNode rightNode = context.relBuilder.build(); | ||
|
|
||
| // 6. Perform a nested-loop join (correlate) between the original table and the expanded | ||
| // array field. | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.