From 65c549f70ec5f5d15a0a372c1ad0be46a6e0cdc2 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Tue, 14 Apr 2026 18:57:55 -0700 Subject: [PATCH 1/3] feat(api): Add datetime UDT extension for unified query API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bridge the type mismatch between PPL UDT types (String-based) and standard Calcite types (int/long-based) in the unified query API path by contributing two post-analysis rules to PPL's LanguageSpec: 1. DatetimeUdtNormalizeRule rewrites UDT calls — replaces UDT return types with standard Calcite types and wraps UDF implementors to convert values at input (int/long -> String) and output (String -> int/long). 2. DatetimeUdtOutputCastRule wraps the plan root with a projection that casts remaining datetime output columns to VARCHAR so the wire format matches PPL's String datetime contract. Both rules are registered via DatetimeUdtExtension, which encapsulates the ordering invariant (normalize before cast). The extension plugs into the LanguageExtension mechanism introduced in #5360 via a new postAnalysisRules hook, applied once at the top of UnifiedQueryPlanner.plan() after the language-specific strategy returns. Applied only on the PPL path; zero impact on the SQL or OpenSearch plugin paths. Signed-off-by: Chen Dai --- .../sql/api/UnifiedQueryPlanner.java | 10 +- .../opensearch/sql/api/spec/LanguageSpec.java | 42 ++++- .../sql/api/spec/UnifiedPplSpec.java | 3 +- .../spec/datetime/DatetimeUdtExtension.java | 92 +++++++++++ .../datetime/DatetimeUdtNormalizeRule.java | 115 ++++++++++++++ .../datetime/DatetimeUdtOutputCastRule.java | 56 +++++++ .../datetime/DatetimeUdtExtensionTest.java | 148 ++++++++++++++++++ .../expression/function/ImplementorUDF.java | 36 +++-- 8 files changed, 482 insertions(+), 20 deletions(-) create mode 100644 api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtExtension.java create mode 100644 api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtNormalizeRule.java create mode 100644 api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtOutputCastRule.java create mode 100644 api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtExtensionTest.java diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java index edf9ae50e18..4ab47f36bb3 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java @@ -60,7 +60,15 @@ public UnifiedQueryPlanner(UnifiedQueryContext context) { */ public RelNode plan(String query) { try { - return context.measure(ANALYZE, () -> strategy.plan(query)); + return context.measure( + ANALYZE, + () -> { + RelNode plan = strategy.plan(query); + for (var rule : context.getLangSpec().postAnalysisRules()) { + plan = rule.apply(plan); + } + return plan; + }); } catch (SyntaxCheckException | UnsupportedOperationException e) { throw e; } catch (Exception e) { diff --git a/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java index 89167dc27a5..49f905f50a1 100644 --- a/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java +++ b/api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java @@ -7,6 +7,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.calcite.rel.RelNode; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlOperatorTable; import org.apache.calcite.sql.fun.SqlStdOperatorTable; @@ -17,8 +18,8 @@ /** * Language specification defining the dialect the engine accepts. Provides parser configuration, - * validator configuration, and composable {@link LanguageExtension}s that contribute operators and - * post-parse rewrite rules. + * validator configuration, and composable {@link LanguageExtension}s that contribute operators, + * post-parse rewrite rules, and post-analysis rewrite rules. * *

Implementations define a complete language surface — for example, {@link UnifiedSqlSpec} * provides ANSI and extended SQL modes. A future PPL spec would implement this same interface once @@ -27,8 +28,18 @@ public interface LanguageSpec { /** - * A composable language extension that contributes operators and post-parse rewrite rules. All - * methods have defaults so extensions only override what they need. + * A RelNode rewrite rule applied after analysis and before execution. Takes a logical plan and + * returns a rewritten plan. + */ + @FunctionalInterface + interface PostAnalysisRule { + RelNode apply(RelNode plan); + } + + /** + * A composable language extension that contributes operators, post-parse rewrite rules, and + * post-analysis rewrite rules. All methods have defaults so extensions only override what they + * need. */ interface LanguageExtension { @@ -47,6 +58,15 @@ default SqlOperatorTable operators() { default List> postParseRules() { return List.of(); } + + /** + * RelNode rewrite rules applied after analysis and before execution. Rules within a single + * extension are applied in list order; extensions that depend on ordering should return their + * rules together from one extension rather than relying on cross-extension ordering. + */ + default List postAnalysisRules() { + return List.of(); + } } /** @@ -62,9 +82,9 @@ default List> postParseRules() { SqlValidator.Config validatorConfig(); /** - * Language extensions registered with this spec. Each extension contributes operators and - * post-parse rewrite rules that are composed by {@link #operatorTable()} and {@link - * #postParseRules()}. + * Language extensions registered with this spec. Each extension contributes operators, post-parse + * rewrite rules, and post-analysis rewrite rules composed by {@link #operatorTable()}, {@link + * #postParseRules()}, and {@link #postAnalysisRules()}. */ List extensions(); @@ -86,4 +106,12 @@ default SqlOperatorTable operatorTable() { default List> postParseRules() { return extensions().stream().flatMap(ext -> ext.postParseRules().stream()).toList(); } + + /** + * All post-analysis RelNode rewrite rules from registered extensions, flattened in registration + * order. Applied to the logical plan after analysis and before execution. + */ + default List postAnalysisRules() { + return extensions().stream().flatMap(ext -> ext.postAnalysisRules().stream()).toList(); + } } diff --git a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedPplSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedPplSpec.java index 763f6ded540..781f75bf0bd 100644 --- a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedPplSpec.java +++ b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedPplSpec.java @@ -10,6 +10,7 @@ import lombok.NoArgsConstructor; import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.sql.validate.SqlValidator; +import org.opensearch.sql.api.spec.datetime.DatetimeUdtExtension; /** * PPL language specification. @@ -37,6 +38,6 @@ public SqlValidator.Config validatorConfig() { @Override public List extensions() { - return List.of(); + return List.of(new DatetimeUdtExtension()); } } diff --git a/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtExtension.java b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtExtension.java new file mode 100644 index 00000000000..cb7026a2c4b --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtExtension.java @@ -0,0 +1,92 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.datetime; + +import java.util.List; +import java.util.Optional; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.avatica.util.DateTimeUtils; +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.api.spec.LanguageSpec.LanguageExtension; +import org.opensearch.sql.api.spec.LanguageSpec.PostAnalysisRule; +import org.opensearch.sql.calcite.type.AbstractExprRelDataType; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; + +/** + * Normalizes datetime UDT operations in the logical plan and casts remaining datetime output + * columns to VARCHAR so the wire output matches PPL's String datetime contract. + * + *

Contributes two ordered post-analysis rules: {@link DatetimeUdtNormalizeRule} rewrites UDT + * calls; {@link DatetimeUdtOutputCastRule} wraps the root with a varchar projection. The cast + * depends on the normalized row type, so both rules live in a single extension to keep their + * ordering encapsulated. + */ +public class DatetimeUdtExtension implements LanguageExtension { + + @Override + public List postAnalysisRules() { + return List.of(new DatetimeUdtNormalizeRule(), new DatetimeUdtOutputCastRule()); + } + + /** Maps a datetime UDT to its standard Calcite equivalent with value conversion methods. */ + @Getter + @RequiredArgsConstructor + enum UdtMapping { + DATE(ExprUDT.EXPR_DATE, SqlTypeName.DATE, "dateStringToUnixDate", "unixDateToString"), + TIME(ExprUDT.EXPR_TIME, SqlTypeName.TIME, "timeStringToUnixDate", "unixTimeToString"), + TIMESTAMP( + ExprUDT.EXPR_TIMESTAMP, + SqlTypeName.TIMESTAMP, + "timestampStringToUnixDate", + "unixTimestampToString"); + + private final ExprUDT udtType; + private final SqlTypeName stdType; + private final String toStdMethod; + private final String fromStdMethod; + + /** Matches a UDT type to its mapping. */ + static Optional fromUdtType(RelDataType type) { + if (!(type instanceof AbstractExprRelDataType e)) return Optional.empty(); + ExprUDT udt = e.getUdt(); + for (UdtMapping u : values()) { + if (u.udtType == udt) return Optional.of(u); + } + return Optional.empty(); + } + + /** Matches a standard Calcite type to its mapping. */ + static Optional fromStdType(RelDataType type) { + SqlTypeName name = type.getSqlTypeName(); + for (UdtMapping u : values()) { + if (u.stdType == name) return Optional.of(u); + } + return Optional.empty(); + } + + RelDataType toStdType(RexBuilder rexBuilder, boolean nullable) { + return rexBuilder + .getTypeFactory() + .createTypeWithNullability(rexBuilder.getTypeFactory().createSqlType(stdType), nullable); + } + + /** UDT value (String) → standard value (int/long). */ + Expression toStdValue(Expression result) { + return Expressions.call( + DateTimeUtils.class, toStdMethod, Expressions.call(result, "toString")); + } + + /** Standard value (int/long) → UDT value (String). */ + Expression fromStdValue(Expression operand) { + return Expressions.call(DateTimeUtils.class, fromStdMethod, operand); + } + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtNormalizeRule.java b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtNormalizeRule.java new file mode 100644 index 00000000000..e0a7358356a --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtNormalizeRule.java @@ -0,0 +1,115 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.datetime; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import org.apache.calcite.adapter.enumerable.NotNullImplementor; +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.rel.RelHomogeneousShuttle; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.validate.SqlUserDefinedFunction; +import org.opensearch.sql.api.spec.LanguageSpec.PostAnalysisRule; +import org.opensearch.sql.api.spec.datetime.DatetimeUdtExtension.UdtMapping; +import org.opensearch.sql.expression.function.ImplementorUDF; +import org.opensearch.sql.expression.function.ImplementorUDF.ImplementableUDFunction; +import org.opensearch.sql.expression.function.UDFOperandMetadata; + +/** + * Normalizes UDT types in the plan by replacing UDT return types with standard Calcite types + * (signature) and wrapping UDF implementors to convert between UDT and standard values + * (implementation). + */ +public class DatetimeUdtNormalizeRule implements PostAnalysisRule { + + @Override + public RelNode apply(RelNode plan) { + RexBuilder rexBuilder = plan.getCluster().getRexBuilder(); + return plan.accept( + new RelHomogeneousShuttle() { + @Override + public RelNode visit(RelNode other) { + return super.visit(other).accept(new UdtRexShuttle(rexBuilder)); + } + }); + } + + private static class UdtRexShuttle extends RexShuttle { + + private final RexBuilder rexBuilder; + + UdtRexShuttle(RexBuilder rexBuilder) { + this.rexBuilder = rexBuilder; + } + + @Override + public RexNode visitCall(RexCall call) { + RexCall visited = (RexCall) super.visitCall(call); + + if (!(visited.getOperator() instanceof SqlUserDefinedFunction udf + && udf.getFunction() instanceof ImplementableUDFunction func)) { + return visited; + } + + Optional returnType = UdtMapping.fromUdtType(visited.getType()); + if (returnType.isEmpty() + && visited.getOperands().stream() + .noneMatch(op -> UdtMapping.fromStdType(op.getType()).isPresent())) { + return visited; + } + + RelDataType normReturnType = normalizeReturnType(rexBuilder, visited, returnType); + NotNullImplementor normFuncImpl = normalizeImplementation(visited, func, returnType); + SqlUserDefinedFunction normUdf = + new ImplementorUDF(normFuncImpl, func.getNullPolicy()) { + @Override + public SqlReturnTypeInference getReturnTypeInference() { + return returnType + .map(u -> ReturnTypes.explicit(normReturnType)) + .orElse(udf.getReturnTypeInference()); + } + + @Override + public UDFOperandMetadata getOperandMetadata() { + return (UDFOperandMetadata) udf.getOperandTypeChecker(); + } + }.toUDF(udf.getName(), udf.isDeterministic()); + return rexBuilder.makeCall(normReturnType, normUdf, visited.getOperands()); + } + } + + /** Replace UDT return type with standard Calcite type. */ + private static RelDataType normalizeReturnType( + RexBuilder rexBuilder, RexCall call, Optional returnUdt) { + return returnUdt + .map(u -> u.toStdType(rexBuilder, call.getType().isNullable())) + .orElse(call.getType()); + } + + /** Wrap implementor to convert inputs (standard → UDT) and output (UDT → standard). */ + private static NotNullImplementor normalizeImplementation( + RexCall originalCall, ImplementableUDFunction func, Optional returnUdt) { + return (translator, rexCall, operands) -> { + List converted = new ArrayList<>(operands.size()); + for (int i = 0; i < operands.size(); i++) { + Expression operand = operands.get(i); + RelDataType opType = originalCall.getOperands().get(i).getType(); + converted.add( + UdtMapping.fromStdType(opType).map(u -> u.fromStdValue(operand)).orElse(operand)); + } + Expression result = func.getNotNullImplementor().implement(translator, rexCall, converted); + return returnUdt.map(u -> u.toStdValue(result)).orElse(result); + }; + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtOutputCastRule.java b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtOutputCastRule.java new file mode 100644 index 00000000000..c1e012b8543 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtOutputCastRule.java @@ -0,0 +1,56 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.datetime; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.api.spec.LanguageSpec.PostAnalysisRule; +import org.opensearch.sql.api.spec.datetime.DatetimeUdtExtension.UdtMapping; + +/** + * Wraps the plan with a projection that casts standard datetime output columns to VARCHAR so the + * wire output matches PPL's String datetime contract. Runs after {@link DatetimeUdtNormalizeRule} + * has converted UDT columns to their standard datetime types. + */ +public class DatetimeUdtOutputCastRule implements PostAnalysisRule { + + @Override + public RelNode apply(RelNode plan) { + List fields = plan.getRowType().getFieldList(); + boolean anyDatetime = + fields.stream().anyMatch(f -> UdtMapping.fromStdType(f.getType()).isPresent()); + if (!anyDatetime) { + return plan; + } + + RexBuilder rexBuilder = plan.getCluster().getRexBuilder(); + RelDataType varcharType = rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR); + List projects = new ArrayList<>(fields.size()); + List names = new ArrayList<>(fields.size()); + for (RelDataTypeField field : fields) { + RexNode ref = rexBuilder.makeInputRef(plan, field.getIndex()); + if (UdtMapping.fromStdType(field.getType()).isPresent()) { + RelDataType nullableVarchar = + rexBuilder + .getTypeFactory() + .createTypeWithNullability(varcharType, field.getType().isNullable()); + projects.add(rexBuilder.makeCast(nullableVarchar, ref)); + } else { + projects.add(ref); + } + names.add(field.getName()); + } + return LogicalProject.create(plan, List.of(), projects, names, Set.of()); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtExtensionTest.java b/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtExtensionTest.java new file mode 100644 index 00000000000..41ef6c07397 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtExtensionTest.java @@ -0,0 +1,148 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.datetime; + +import static java.sql.Types.BIGINT; +import static java.sql.Types.INTEGER; +import static java.sql.Types.VARCHAR; +import static org.junit.Assert.assertFalse; + +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.time.LocalDate; +import java.time.LocalTime; +import org.apache.calcite.rel.RelHomogeneousShuttle; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.schema.Table; +import org.apache.calcite.sql.type.SqlTypeName; +import org.junit.Before; +import org.junit.Test; +import org.opensearch.sql.api.ResultSetAssertion; +import org.opensearch.sql.api.UnifiedQueryTestBase; +import org.opensearch.sql.api.compiler.UnifiedQueryCompiler; +import org.opensearch.sql.api.spec.datetime.DatetimeUdtExtension.UdtMapping; + +public class DatetimeUdtExtensionTest extends UnifiedQueryTestBase implements ResultSetAssertion { + + private UnifiedQueryCompiler compiler; + + @Before + public void setUp() { + super.setUp(); + compiler = new UnifiedQueryCompiler(context); + } + + @Override + protected Table createEmployeesTable() { + return SimpleTable.builder() + .col("name", SqlTypeName.VARCHAR) + .col("hire_date", SqlTypeName.DATE) + .col("login_time", SqlTypeName.TIME) + .col("updated_at", SqlTypeName.TIMESTAMP) + .row( + new Object[] { + "Alice", + (int) LocalDate.of(2020, 3, 15).toEpochDay(), + (int) (LocalTime.of(9, 30).toNanoOfDay() / 1_000_000), + 1705312200000L + }) // 2024-01-15 10:30:00 UTC + .build(); + } + + private void assertNoUdtInRexCalls(RelNode plan) { + plan.accept( + new RelHomogeneousShuttle() { + @Override + public RelNode visit(RelNode other) { + other.accept( + new RexShuttle() { + @Override + public RexNode visitCall(RexCall call) { + assertFalse( + "RexCall " + call + " has UDT return type: " + call.getType(), + UdtMapping.fromUdtType(call.getType()).isPresent()); + return super.visitCall(call); + } + }); + return super.visit(other); + } + }); + } + + private ResultSet planAndExecute(String query) throws Exception { + RelNode plan = planner.plan(query); + assertNoUdtInRexCalls(plan); + PreparedStatement stmt = compiler.compile(plan); + return stmt.executeQuery(); + } + + @Test + public void testDateUdtNormalization() throws Exception { + ResultSet rs = + planAndExecute( + "source = catalog.employees" + + " | eval last = LAST_DAY(hire_date), y = YEAR(hire_date)" + + " | fields last, y"); + verify(rs) + .expectSchema(col("last", VARCHAR), col("y", INTEGER)) + .expectData(row("2020-03-31", 2020)); + } + + @Test + public void testTimeUdtNormalization() throws Exception { + ResultSet rs = + planAndExecute( + "source = catalog.employees | where login_time > MAKETIME(8, 0, 0) | fields" + + " login_time"); + verify(rs).expectSchema(col("login_time", VARCHAR)).expectData(row("09:30:00")); + } + + @Test + public void testTimestampUdtNormalization() throws Exception { + ResultSet rs = + planAndExecute( + "source = catalog.employees" + + " | where updated_at > TIMESTAMP('2024-01-01 00:00:00')" + + " | eval fmt = DATE_FORMAT(updated_at, '%Y-%m')" + + " | fields fmt"); + verify(rs).expectSchema(col("fmt", VARCHAR)).expectData(row("2024-01")); + } + + @Test + public void testNestedUdfCalls() throws Exception { + ResultSet rs = + planAndExecute( + "source = catalog.employees" + + " | eval days = DATEDIFF(DATE('2025-01-01'), DATE(hire_date))" + + " | fields days"); + verify(rs).expectSchema(col("days", BIGINT)).expectData(row(1753L)); + } + + @Test + public void testUnrelatedUdfUnaffected() throws Exception { + ResultSet rs = + planAndExecute("source = catalog.employees | eval s = CONCAT(name, ' test') | fields s"); + verify(rs).expectSchema(col("s", VARCHAR)).expectData(row("Alice test")); + } + + @Test + public void testBareDatetimeColumnPassthrough() throws Exception { + // DATE and TIME are TZ-independent; TIMESTAMP formatting depends on JVM TZ + ResultSet rs = planAndExecute("source = catalog.employees | fields hire_date, login_time"); + verify(rs) + .expectSchema(col("hire_date", VARCHAR), col("login_time", VARCHAR)) + .expectData(row("2020-03-15", "09:30:00")); + } + + @Test + public void testStandardCalciteFunctionUnaffected() throws Exception { + ResultSet rs = planAndExecute("source = catalog.employees | eval u = UPPER(name) | fields u"); + verify(rs).expectSchema(col("u", VARCHAR)).expectData(row("ALICE")); + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/ImplementorUDF.java b/core/src/main/java/org/opensearch/sql/expression/function/ImplementorUDF.java index 248deb8bd02..ffc9283aa7f 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/ImplementorUDF.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/ImplementorUDF.java @@ -6,6 +6,8 @@ package org.opensearch.sql.expression.function; import java.util.List; +import lombok.Getter; +import lombok.RequiredArgsConstructor; import org.apache.calcite.adapter.enumerable.CallImplementor; import org.apache.calcite.adapter.enumerable.NotNullImplementor; import org.apache.calcite.adapter.enumerable.NullPolicy; @@ -25,16 +27,28 @@ protected ImplementorUDF(NotNullImplementor implementor, NullPolicy nullPolicy) @Override public ImplementableFunction getFunction() { - return new ImplementableFunction() { - @Override - public List getParameters() { - return List.of(); - } - - @Override - public CallImplementor getImplementor() { - return RexImpTable.createImplementor(implementor, nullPolicy, false); - } - }; + return new ImplementableUDFunction(implementor, nullPolicy); + } + + /** + * Named ImplementableFunction that exposes the NotNullImplementor and NullPolicy. This allows + * rewriters (e.g., DatetimeUdtRewriter) to access the original implementor for wrapping without + * reflection. + */ + @Getter + @RequiredArgsConstructor + public static class ImplementableUDFunction implements ImplementableFunction { + private final NotNullImplementor notNullImplementor; + private final NullPolicy nullPolicy; + + @Override + public List getParameters() { + return List.of(); + } + + @Override + public CallImplementor getImplementor() { + return RexImpTable.createImplementor(notNullImplementor, nullPolicy, false); + } } } From 7b267574b416b134e731d0bf9e3c2cddd3c44762 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Wed, 29 Apr 2026 16:13:20 -0700 Subject: [PATCH 2/3] feat(api): Coerce VARCHAR operands to datetime in non-UDF calls Add DatetimeUdtLiteralCoercionRule as a post-analysis rule that wraps VARCHAR operands with CAST(... AS ) inside comparisons, IN, SEARCH, BETWEEN, and COALESCE when the call has a standard Calcite DATE/TIME/TIMESTAMP operand alongside. This closes the gap left by DatetimeUdtNormalizeRule, which only rewrites operators backed by ImplementableUDFunction. The rule only modifies operand subtrees inside RexCall nodes; no RelNode rowType or RexInputRef slot identity is altered, so Calcite's cached RexInputRef types cannot be invalidated (unlike an in-place ref rewrite). Registered first in the extension's rule list so normalize and output-cast see homogeneous types downstream. Known scope limits: IN and BETWEEN with cross-type value lists are rejected by CalciteRexNodeVisitor before any post-analysis rule can run, and datetime+interval arithmetic requires a separate function signature registration; both are out of scope for this rule. Signed-off-by: Chen Dai --- .../spec/datetime/DatetimeUdtExtension.java | 15 +- .../DatetimeUdtLiteralCoercionRule.java | 121 +++++++++++++++ .../DatetimeUdtLiteralCoercionRuleTest.java | 143 ++++++++++++++++++ 3 files changed, 274 insertions(+), 5 deletions(-) create mode 100644 api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtLiteralCoercionRule.java create mode 100644 api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtLiteralCoercionRuleTest.java diff --git a/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtExtension.java b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtExtension.java index cb7026a2c4b..e7ca07d82e3 100644 --- a/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtExtension.java +++ b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtExtension.java @@ -24,16 +24,21 @@ * Normalizes datetime UDT operations in the logical plan and casts remaining datetime output * columns to VARCHAR so the wire output matches PPL's String datetime contract. * - *

Contributes two ordered post-analysis rules: {@link DatetimeUdtNormalizeRule} rewrites UDT - * calls; {@link DatetimeUdtOutputCastRule} wraps the root with a varchar projection. The cast - * depends on the normalized row type, so both rules live in a single extension to keep their - * ordering encapsulated. + *

Contributes three ordered post-analysis rules: {@link DatetimeUdtLiteralCoercionRule} casts + * VARCHAR operands that appear alongside standard datetime operands in non-UDF operators (so the + * subsequent rules see homogeneous types); {@link DatetimeUdtNormalizeRule} rewrites UDT calls; + * {@link DatetimeUdtOutputCastRule} wraps the root with a varchar projection. The cast depends on + * the normalized row type, so all three rules live in a single extension to keep their ordering + * encapsulated. */ public class DatetimeUdtExtension implements LanguageExtension { @Override public List postAnalysisRules() { - return List.of(new DatetimeUdtNormalizeRule(), new DatetimeUdtOutputCastRule()); + return List.of( + new DatetimeUdtLiteralCoercionRule(), + new DatetimeUdtNormalizeRule(), + new DatetimeUdtOutputCastRule()); } /** Maps a datetime UDT to its standard Calcite equivalent with value conversion methods. */ diff --git a/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtLiteralCoercionRule.java b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtLiteralCoercionRule.java new file mode 100644 index 00000000000..6e5957528ac --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtLiteralCoercionRule.java @@ -0,0 +1,121 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.datetime; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import org.apache.calcite.rel.RelHomogeneousShuttle; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.api.spec.LanguageSpec.PostAnalysisRule; +import org.opensearch.sql.api.spec.datetime.DatetimeUdtExtension.UdtMapping; + +/** + * Coerces VARCHAR literals and references that appear alongside standard datetime operands inside + * non-UDF operators (comparisons, IN, BETWEEN/SEARCH, COALESCE) by wrapping the VARCHAR side in + * {@code CAST(... AS DATE|TIME|TIMESTAMP)}. This closes the gap left by {@link + * DatetimeUdtNormalizeRule}, which only rewrites operators backed by {@code + * ImplementableUDFunction}. + * + *

Only operand sub-trees inside {@code RexCall} nodes are modified; no {@code RelNode} row type + * is changed and no {@code RexInputRef} slot identity is altered. This keeps the rewrite safe + * against Calcite's cached {@code RexInputRef} types (unlike an in-place ref rewrite that would + * invalidate parent nodes). + */ +public class DatetimeUdtLiteralCoercionRule implements PostAnalysisRule { + + @Override + public RelNode apply(RelNode plan) { + RexBuilder rexBuilder = plan.getCluster().getRexBuilder(); + return plan.accept( + new RelHomogeneousShuttle() { + @Override + public RelNode visit(RelNode other) { + return super.visit(other).accept(new LiteralCoercionShuttle(rexBuilder)); + } + }); + } + + private static class LiteralCoercionShuttle extends RexShuttle { + + private final RexBuilder rexBuilder; + + LiteralCoercionShuttle(RexBuilder rexBuilder) { + this.rexBuilder = rexBuilder; + } + + @Override + public RexNode visitCall(RexCall call) { + RexCall visited = (RexCall) super.visitCall(call); + if (!isTargetOperator(visited)) { + return visited; + } + Optional datetime = findDatetimeOperand(visited); + if (datetime.isEmpty()) { + return visited; + } + List coerced = coerceVarcharOperands(visited.getOperands(), datetime.get()); + if (coerced.equals(visited.getOperands())) { + return visited; + } + return visited.clone(visited.getType(), coerced); + } + + /** Operators where we perform VARCHAR ↔ datetime operand coercion. */ + private static boolean isTargetOperator(RexCall call) { + SqlKind kind = call.getKind(); + return kind == SqlKind.EQUALS + || kind == SqlKind.NOT_EQUALS + || kind == SqlKind.GREATER_THAN + || kind == SqlKind.GREATER_THAN_OR_EQUAL + || kind == SqlKind.LESS_THAN + || kind == SqlKind.LESS_THAN_OR_EQUAL + || kind == SqlKind.IN + || kind == SqlKind.SEARCH + || kind == SqlKind.BETWEEN + || kind == SqlKind.COALESCE; + } + + /** Returns the first operand whose type is a standard Calcite datetime. */ + private static Optional findDatetimeOperand(RexCall call) { + for (RexNode op : call.getOperands()) { + Optional m = UdtMapping.fromStdType(op.getType()); + if (m.isPresent()) { + return m; + } + } + return Optional.empty(); + } + + /** Wraps every VARCHAR/CHAR operand in {@code CAST(... AS )}. */ + private List coerceVarcharOperands(List operands, UdtMapping datetime) { + List coerced = new ArrayList<>(operands.size()); + boolean changed = false; + for (RexNode op : operands) { + if (isCharType(op.getType())) { + RelDataType target = datetime.toStdType(rexBuilder, op.getType().isNullable()); + coerced.add(rexBuilder.makeCast(target, op)); + changed = true; + } else { + coerced.add(op); + } + } + return changed ? coerced : operands; + } + + private static boolean isCharType(RelDataType type) { + SqlTypeName name = type.getSqlTypeName(); + return name == SqlTypeName.VARCHAR || name == SqlTypeName.CHAR; + } + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtLiteralCoercionRuleTest.java b/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtLiteralCoercionRuleTest.java new file mode 100644 index 00000000000..5e991251a16 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtLiteralCoercionRuleTest.java @@ -0,0 +1,143 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec.datetime; + +import static java.sql.Types.VARCHAR; + +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.time.LocalDate; +import java.time.LocalTime; +import org.apache.calcite.schema.Table; +import org.apache.calcite.sql.type.SqlTypeName; +import org.junit.Before; +import org.junit.Test; +import org.opensearch.sql.api.ResultSetAssertion; +import org.opensearch.sql.api.UnifiedQueryTestBase; +import org.opensearch.sql.api.compiler.UnifiedQueryCompiler; + +/** + * Tests for {@link DatetimeUdtLiteralCoercionRule}. Exercises the non-UDF constructs where a + * standard Calcite DATE/TIME/TIMESTAMP operand meets a VARCHAR literal: comparisons, conditional + * expressions ({@code case()}, {@code if()}), and {@code fillnull}. Without the rule these queries + * fail at runtime (Janino cannot compile {@code int > String}) or at Calcite code generation + * (COALESCE type mismatch). + * + *

Note: {@code IN} and {@code BETWEEN} are intentionally NOT covered here. The PPL AST→Rex + * visitor ({@code CalciteRexNodeVisitor.visitIn} / {@code visitBetween}) rejects cross-type lists + * at translation time by throwing {@link org.opensearch.sql.exception.SemanticCheckException} + * before any post-analysis rule can run. Covering those cases requires a separate change to the + * PPL visitor (out of scope for this post-analysis rule). + */ +public class DatetimeUdtLiteralCoercionRuleTest extends UnifiedQueryTestBase + implements ResultSetAssertion { + + private UnifiedQueryCompiler compiler; + + @Before + public void setUp() { + super.setUp(); + compiler = new UnifiedQueryCompiler(context); + } + + @Override + protected Table createEmployeesTable() { + return SimpleTable.builder() + .col("name", SqlTypeName.VARCHAR) + .col("hire_date", SqlTypeName.DATE) + .col("login_time", SqlTypeName.TIME) + .col("updated_at", SqlTypeName.TIMESTAMP) + .row( + new Object[] { + "Alice", + (int) LocalDate.of(2020, 3, 15).toEpochDay(), + (int) (LocalTime.of(9, 30).toNanoOfDay() / 1_000_000), + 1705312200000L + }) + .row( + new Object[] { + "Bob", + (int) LocalDate.of(2022, 6, 1).toEpochDay(), + (int) (LocalTime.of(18, 45).toNanoOfDay() / 1_000_000), + 1735689600000L + }) + .build(); + } + + private ResultSet planAndExecute(String query) throws Exception { + PreparedStatement stmt = compiler.compile(planner.plan(query)); + return stmt.executeQuery(); + } + + /** Case 1a: greater-than between DATE column and VARCHAR literal. */ + @Test + public void compareDateColWithStringLiteral() throws Exception { + ResultSet rs = + planAndExecute( + "source = catalog.employees | where hire_date > '2021-01-01' | fields name, hire_date"); + verify(rs) + .expectSchema(col("name", VARCHAR), col("hire_date", VARCHAR)) + .expectData(row("Bob", "2022-06-01")); + } + + /** Case 1b: less-than between TIME column and VARCHAR literal. */ + @Test + public void compareTimeColWithStringLiteral() throws Exception { + ResultSet rs = + planAndExecute( + "source = catalog.employees | where login_time < '12:00:00' | fields name"); + verify(rs).expectSchema(col("name", VARCHAR)).expectData(row("Alice")); + } + + /** Case 1c: greater-than-or-equal between TIMESTAMP column and VARCHAR literal. */ + @Test + public void compareTimestampColWithStringLiteral() throws Exception { + ResultSet rs = + planAndExecute( + "source = catalog.employees" + + " | where updated_at >= '2025-01-01 00:00:00'" + + " | fields name"); + verify(rs).expectSchema(col("name", VARCHAR)).expectData(row("Bob")); + } + + /** Case 2: equality between DATE column and VARCHAR literal. */ + @Test + public void equalsDateColWithStringLiteral() throws Exception { + ResultSet rs = + planAndExecute( + "source = catalog.employees | where hire_date = '2020-03-15' | fields name"); + verify(rs).expectSchema(col("name", VARCHAR)).expectData(row("Alice")); + } + + /** Case 3: {@code case()} condition comparing DATE column to VARCHAR literal. */ + @Test + public void caseConditionOnDateColAndStringLiteral() throws Exception { + ResultSet rs = + planAndExecute( + "source = catalog.employees" + + " | eval bucket = case(hire_date > '2021-01-01', 'new', true, 'old')" + + " | fields name, bucket"); + verify(rs) + .expectSchema(col("name", VARCHAR), col("bucket", VARCHAR)) + .expectData(row("Alice", "old"), row("Bob", "new")); + } + + /** Negative: comparison between two DATE columns is untouched (common type already). */ + @Test + public void compareTwoDateColsUnaffected() throws Exception { + ResultSet rs = + planAndExecute("source = catalog.employees | where hire_date = hire_date | fields name"); + verify(rs).expectSchema(col("name", VARCHAR)).expectData(row("Alice"), row("Bob")); + } + + /** Negative: VARCHAR-VARCHAR comparison is untouched (no datetime operand). */ + @Test + public void compareVarcharColsUnaffected() throws Exception { + ResultSet rs = + planAndExecute("source = catalog.employees | where name = 'Alice' | fields name"); + verify(rs).expectSchema(col("name", VARCHAR)).expectData(row("Alice")); + } +} From 9887cf56fa5a759bf3045c948b51d2090b4d6de3 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Wed, 29 Apr 2026 16:15:31 -0700 Subject: [PATCH 3/3] style(api): Apply spotless formatting to coercion rule test Signed-off-by: Chen Dai --- .../datetime/DatetimeUdtLiteralCoercionRuleTest.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtLiteralCoercionRuleTest.java b/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtLiteralCoercionRuleTest.java index 5e991251a16..caa51b85a4f 100644 --- a/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtLiteralCoercionRuleTest.java +++ b/api/src/test/java/org/opensearch/sql/api/spec/datetime/DatetimeUdtLiteralCoercionRuleTest.java @@ -29,8 +29,8 @@ *

Note: {@code IN} and {@code BETWEEN} are intentionally NOT covered here. The PPL AST→Rex * visitor ({@code CalciteRexNodeVisitor.visitIn} / {@code visitBetween}) rejects cross-type lists * at translation time by throwing {@link org.opensearch.sql.exception.SemanticCheckException} - * before any post-analysis rule can run. Covering those cases requires a separate change to the - * PPL visitor (out of scope for this post-analysis rule). + * before any post-analysis rule can run. Covering those cases requires a separate change to the PPL + * visitor (out of scope for this post-analysis rule). */ public class DatetimeUdtLiteralCoercionRuleTest extends UnifiedQueryTestBase implements ResultSetAssertion { @@ -87,8 +87,7 @@ public void compareDateColWithStringLiteral() throws Exception { @Test public void compareTimeColWithStringLiteral() throws Exception { ResultSet rs = - planAndExecute( - "source = catalog.employees | where login_time < '12:00:00' | fields name"); + planAndExecute("source = catalog.employees | where login_time < '12:00:00' | fields name"); verify(rs).expectSchema(col("name", VARCHAR)).expectData(row("Alice")); } @@ -107,8 +106,7 @@ public void compareTimestampColWithStringLiteral() throws Exception { @Test public void equalsDateColWithStringLiteral() throws Exception { ResultSet rs = - planAndExecute( - "source = catalog.employees | where hire_date = '2020-03-15' | fields name"); + planAndExecute("source = catalog.employees | where hire_date = '2020-03-15' | fields name"); verify(rs).expectSchema(col("name", VARCHAR)).expectData(row("Alice")); }