diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 65056aecbad..4e01cf1e965 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -66,6 +66,7 @@ import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CloseCursor; +import org.opensearch.sql.ast.tree.Convert; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.Expand; @@ -524,6 +525,11 @@ public LogicalPlan visitEval(Eval node, AnalysisContext context) { return new LogicalEval(child, expressionsBuilder.build()); } + @Override + public LogicalPlan visitConvert(Convert node, AnalysisContext context) { + throw getOnlyForCalciteException("convert"); + } + @Override public LogicalPlan visitAddTotals(AddTotals node, AnalysisContext context) { throw getOnlyForCalciteException("addtotals"); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index a6ef5e7547a..d0603ff3a38 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -54,6 +54,7 @@ import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CloseCursor; +import org.opensearch.sql.ast.tree.Convert; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.Expand; @@ -410,6 +411,10 @@ public T visitFillNull(FillNull fillNull, C context) { return visitChildren(fillNull, context); } + public T visitConvert(Convert node, C context) { + return visitChildren(node, context); + } + public T visitPatterns(Patterns patterns, C context) { return visitChildren(patterns, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java b/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java index eff567ea498..7b080ac57fc 100644 --- a/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java @@ -34,6 +34,7 @@ import org.opensearch.sql.ast.tree.AppendPipe; import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.Chart; +import org.opensearch.sql.ast.tree.Convert; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.Expand; @@ -605,6 +606,28 @@ public Node visitExpand(Expand node, FieldResolutionContext context) { return node; } + @Override + public Node visitConvert(Convert node, FieldResolutionContext context) { + Set inputFields = new HashSet<>(); + Set outputFields = new HashSet<>(); + + for (Let conversion : node.getConversions()) { + outputFields.add(conversion.getVar().getField().toString()); + inputFields.addAll(extractFieldsFromExpression(conversion.getExpression())); + } + + FieldResolutionResult currentReq = context.getCurrentRequirements(); + Set upstreamRequiredFields = new HashSet<>(currentReq.getRegularFields()); + upstreamRequiredFields.removeAll(outputFields); + upstreamRequiredFields.addAll(inputFields); + + context.pushRequirements( + new FieldResolutionResult(upstreamRequiredFields, currentReq.getWildcard())); + visitChildren(node, context); + context.popRequirements(); + return node; + } + private Set extractFieldsFromAggregation(UnresolvedExpression expr) { Set fields = new HashSet<>(); if (expr instanceof Alias alias) { diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Convert.java b/core/src/main/java/org/opensearch/sql/ast/tree/Convert.java new file mode 100644 index 00000000000..74406b0daf2 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Convert.java @@ -0,0 +1,43 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.Setter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.Let; + +/** AST node representing the Convert command. */ +@Getter +@Setter +@ToString +@EqualsAndHashCode(callSuper = false) +@RequiredArgsConstructor +public class Convert extends UnresolvedPlan { + private final List conversions; + private UnresolvedPlan child; + + @Override + public Convert attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitConvert(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index f1bc5fd6a0d..b6620851adc 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -33,6 +33,7 @@ import java.util.Arrays; import java.util.BitSet; import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -87,6 +88,7 @@ import org.opensearch.sql.ast.expression.Argument.ArgumentMap; import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.Function; +import org.opensearch.sql.ast.expression.Let; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.ParseMethod; import org.opensearch.sql.ast.expression.PatternMethod; @@ -107,6 +109,7 @@ import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.Chart; import org.opensearch.sql.ast.tree.CloseCursor; +import org.opensearch.sql.ast.tree.Convert; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.Expand; @@ -928,6 +931,117 @@ public RelNode visitEval(Eval node, CalcitePlanContext context) { return context.relBuilder.peek(); } + @Override + public RelNode visitConvert(Convert node, CalcitePlanContext context) { + visitChildren(node, context); + + if (node.getConversions() == null || node.getConversions().isEmpty()) { + return context.relBuilder.peek(); + } + + ConversionState state = new ConversionState(); + + for (Let conversion : node.getConversions()) { + processConversion(conversion, state, context); + } + + return buildConversionProjection(state, context); + } + + private static class ConversionState { + final Map replacements = new HashMap<>(); + final List> additions = new ArrayList<>(); + final Set seenFields = new HashSet<>(); + } + + private void processConversion( + Let conversion, ConversionState state, CalcitePlanContext context) { + String target = conversion.getVar().getField().toString(); + UnresolvedExpression expression = conversion.getExpression(); + + if (expression instanceof Field) { + processFieldCopyConversion(target, (Field) expression, state, context); + } else if (expression instanceof Function) { + processFunctionConversion(target, (Function) expression, state, context); + } else { + throw new SemanticCheckException("Convert command requires function call expressions"); + } + } + + private void processFieldCopyConversion( + String target, Field field, ConversionState state, CalcitePlanContext context) { + String source = field.getField().toString(); + + if (state.seenFields.contains(source)) { + throw new SemanticCheckException( + String.format("Field '%s' cannot be converted more than once", source)); + } + state.seenFields.add(source); + + if (!target.equals(source)) { + RexNode sourceField = context.relBuilder.field(source); + state.additions.add(Pair.of(target, context.relBuilder.alias(sourceField, target))); + } + } + + private void processFunctionConversion( + String target, Function function, ConversionState state, CalcitePlanContext context) { + String functionName = function.getFuncName(); + List args = function.getFuncArgs(); + + if (args.size() != 1 || !(args.get(0) instanceof Field)) { + throw new SemanticCheckException("Convert function must have exactly one field argument"); + } + + String source = ((Field) args.get(0)).getField().toString(); + + if (state.seenFields.contains(source)) { + throw new SemanticCheckException( + String.format("Field '%s' cannot be converted more than once", source)); + } + state.seenFields.add(source); + + RexNode sourceField = context.relBuilder.field(source); + RexNode convertCall = + PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, functionName, sourceField); + + if (!target.equals(source)) { + state.additions.add(Pair.of(target, context.relBuilder.alias(convertCall, target))); + } else { + state.replacements.put(source, context.relBuilder.alias(convertCall, source)); + } + } + + private RelNode buildConversionProjection(ConversionState state, CalcitePlanContext context) { + List originalFields = context.relBuilder.peek().getRowType().getFieldNames(); + List projectList = new ArrayList<>(); + + for (String fieldName : originalFields) { + projectList.add( + state.replacements.getOrDefault(fieldName, context.relBuilder.field(fieldName))); + } + + Set addedAsNames = new HashSet<>(); + for (Pair addition : state.additions) { + String asName = addition.getLeft(); + + if (originalFields.contains(asName)) { + throw new SemanticCheckException( + String.format("AS name '%s' conflicts with existing field", asName)); + } + if (addedAsNames.contains(asName)) { + throw new SemanticCheckException( + String.format("AS name '%s' is used multiple times in convert", asName)); + } + + addedAsNames.add(asName); + projectList.add(addition.getRight()); + } + + context.relBuilder.project(projectList); + return context.relBuilder.peek(); + } + private void projectPlusOverriding( List newFields, List newNames, CalcitePlanContext context) { List originalFieldNames = context.relBuilder.peek().getRowType().getFieldNames(); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 50f88d47baf..d1c1365e464 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -300,6 +300,18 @@ public enum BuiltinFunctionName { INTERVAL(FunctionName.of("interval")), + /** PPL Convert Command Functions. */ + AUTO(FunctionName.of("auto")), + NUM(FunctionName.of("num")), + CTIME(FunctionName.of("ctime")), + MKTIME(FunctionName.of("mktime")), + DUR2SEC(FunctionName.of("dur2sec")), + MEMK(FunctionName.of("memk")), + MSTIME(FunctionName.of("mstime")), + RMUNIT(FunctionName.of("rmunit")), + RMCOMMA(FunctionName.of("rmcomma")), + NONE(FunctionName.of("none")), + /** Data Type Convert Function. */ CAST_TO_STRING(FunctionName.of("cast_to_string")), CAST_TO_BYTE(FunctionName.of("cast_to_byte")), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java index 3810352cbfd..e35d7c116a0 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java @@ -63,12 +63,17 @@ import org.opensearch.sql.expression.function.jsonUDF.JsonFunctionImpl; import org.opensearch.sql.expression.function.jsonUDF.JsonKeysFunctionImpl; import org.opensearch.sql.expression.function.jsonUDF.JsonSetFunctionImpl; +import org.opensearch.sql.expression.function.udf.AutoConvertFunction; import org.opensearch.sql.expression.function.udf.CryptographicFunction; +import org.opensearch.sql.expression.function.udf.MemkConvertFunction; +import org.opensearch.sql.expression.function.udf.NumConvertFunction; import org.opensearch.sql.expression.function.udf.ParseFunction; import org.opensearch.sql.expression.function.udf.RelevanceQueryFunction; import org.opensearch.sql.expression.function.udf.RexExtractFunction; import org.opensearch.sql.expression.function.udf.RexExtractMultiFunction; import org.opensearch.sql.expression.function.udf.RexOffsetFunction; +import org.opensearch.sql.expression.function.udf.RmcommaConvertFunction; +import org.opensearch.sql.expression.function.udf.RmunitConvertFunction; import org.opensearch.sql.expression.function.udf.SpanFunction; import org.opensearch.sql.expression.function.udf.ToNumberFunction; import org.opensearch.sql.expression.function.udf.ToStringFunction; @@ -421,6 +426,14 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable { new NumberToStringFunction().toUDF("NUMBER_TO_STRING"); public static final SqlOperator TONUMBER = new ToNumberFunction().toUDF("TONUMBER"); public static final SqlOperator TOSTRING = new ToStringFunction().toUDF("TOSTRING"); + + // PPL Convert command functions + public static final SqlOperator AUTO = new AutoConvertFunction().toUDF("AUTO"); + public static final SqlOperator NUM = new NumConvertFunction().toUDF("NUM"); + public static final SqlOperator RMCOMMA = new RmcommaConvertFunction().toUDF("RMCOMMA"); + public static final SqlOperator RMUNIT = new RmunitConvertFunction().toUDF("RMUNIT"); + public static final SqlOperator MEMK = new MemkConvertFunction().toUDF("MEMK"); + public static final SqlOperator WIDTH_BUCKET = new org.opensearch.sql.expression.function.udf.binning.WidthBucketFunction() .toUDF("WIDTH_BUCKET"); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 2d594c48f55..dd5ae38a686 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -22,6 +22,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.ASIN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ATAN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ATAN2; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.AUTO; import static org.opensearch.sql.expression.function.BuiltinFunctionName.AVG; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CBRT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.CEIL; @@ -136,6 +137,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MD5; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MEDIAN; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MEMK; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MICROSECOND; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MIN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MINSPAN_BUCKET; @@ -162,6 +164,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOTEQUAL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOW; import static org.opensearch.sql.expression.function.BuiltinFunctionName.NULLIF; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.NUM; import static org.opensearch.sql.expression.function.BuiltinFunctionName.OR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.PERCENTILE_APPROX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.PERIOD_ADD; @@ -185,6 +188,8 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_OFFSET; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RIGHT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RINT; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.RMCOMMA; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.RMUNIT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ROUND; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RTRIM; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SCALAR_MAX; @@ -983,6 +988,14 @@ void populate() { registerOperator(INTERNAL_PATTERN_PARSER, PPLBuiltinOperators.PATTERN_PARSER); registerOperator(TONUMBER, PPLBuiltinOperators.TONUMBER); registerOperator(TOSTRING, PPLBuiltinOperators.TOSTRING); + + // Register PPL Convert command functions + registerOperator(AUTO, PPLBuiltinOperators.AUTO); + registerOperator(NUM, PPLBuiltinOperators.NUM); + registerOperator(RMCOMMA, PPLBuiltinOperators.RMCOMMA); + registerOperator(RMUNIT, PPLBuiltinOperators.RMUNIT); + registerOperator(MEMK, PPLBuiltinOperators.MEMK); + register( TOSTRING, (FunctionImp1) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/AutoConvertFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/AutoConvertFunction.java new file mode 100644 index 00000000000..15f2a4bb457 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/AutoConvertFunction.java @@ -0,0 +1,32 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +/** PPL auto() conversion function. */ +public class AutoConvertFunction extends BaseConversionUDF { + + public AutoConvertFunction() { + super(AutoConvertFunction.class); + } + + public static Object convert(Object value) { + if (value instanceof Number) { + return ((Number) value).doubleValue(); + } + + String str = ConversionUtils.preprocessValue(value); + if (str == null) { + return null; + } + + Double result = ConversionUtils.tryConvertMemoryUnit(str); + if (result != null) { + return result; + } + + return NumConvertFunction.convert(value); + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/BaseConversionUDF.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/BaseConversionUDF.java new file mode 100644 index 00000000000..1836e0c9517 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/BaseConversionUDF.java @@ -0,0 +1,72 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import java.util.List; +import org.apache.calcite.adapter.enumerable.NotNullImplementor; +import org.apache.calcite.adapter.enumerable.NullPolicy; +import org.apache.calcite.adapter.enumerable.RexToLixTranslator; +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.calcite.utils.PPLOperandTypes; +import org.opensearch.sql.expression.function.ImplementorUDF; +import org.opensearch.sql.expression.function.UDFOperandMetadata; + +/** Base class for PPL conversion functions. */ +public abstract class BaseConversionUDF extends ImplementorUDF { + + protected BaseConversionUDF(Class functionClass) { + super(new ConversionImplementor(functionClass), NullPolicy.ANY); + } + + @Override + public SqlReturnTypeInference getReturnTypeInference() { + return ReturnTypes.explicit( + factory -> + factory.createTypeWithNullability(factory.createSqlType(SqlTypeName.DOUBLE), true)); + } + + @Override + public UDFOperandMetadata getOperandMetadata() { + return PPLOperandTypes.OPTIONAL_ANY; + } + + public static class ConversionImplementor implements NotNullImplementor { + private final Class functionClass; + + public ConversionImplementor(Class functionClass) { + this.functionClass = functionClass; + } + + @Override + public Expression implement( + RexToLixTranslator translator, RexCall call, List translatedOperands) { + if (translatedOperands.isEmpty()) { + return Expressions.call( + ConversionImplementor.class, + "toDoubleOrNull", + Expressions.constant(null, Object.class)); + } + + Expression fieldValue = translatedOperands.get(0); + + Expression result = Expressions.call(functionClass, "convert", Expressions.box(fieldValue)); + + return Expressions.call(ConversionImplementor.class, "toDoubleOrNull", result); + } + + public static Double toDoubleOrNull(Object value) { + if (value instanceof Number) { + return ((Number) value).doubleValue(); + } + return null; + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ConversionUtils.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ConversionUtils.java new file mode 100644 index 00000000000..675e996cbb2 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ConversionUtils.java @@ -0,0 +1,98 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import lombok.extern.log4j.Log4j2; + +@Log4j2 +public class ConversionUtils { + + private ConversionUtils() {} + + public static final Pattern COMMA_PATTERN = Pattern.compile(","); + public static final Pattern LEADING_NUMBER_WITH_UNIT_PATTERN = + Pattern.compile("^([+-]?(?:\\d+\\.?\\d*|\\.\\d+)(?:[eE][+-]?\\d+)?)(.*)$"); + public static final Pattern CONTAINS_LETTER_PATTERN = Pattern.compile(".*[a-zA-Z].*"); + public static final Pattern STARTS_WITH_SIGN_OR_DIGIT = Pattern.compile("^[+-]?[\\d.].*"); + public static final Pattern MEMK_PATTERN = Pattern.compile("^([+-]?\\d+\\.?\\d*)([kmgKMG])?$"); + + public static final double MB_TO_KB = 1024.0; + public static final double GB_TO_KB = 1024.0 * 1024.0; + + public static String preprocessValue(Object value) { + if (value == null) { + return null; + } + String str = value instanceof String ? ((String) value).trim() : value.toString().trim(); + return str.isEmpty() ? null : str; + } + + public static Double tryParseDouble(String str) { + try { + return Double.parseDouble(str); + } catch (NumberFormatException e) { + log.debug("Failed to parse '{}' as number", str, e); + return null; + } + } + + public static String extractLeadingNumber(String str) { + Matcher matcher = LEADING_NUMBER_WITH_UNIT_PATTERN.matcher(str); + if (matcher.matches()) { + return matcher.group(1); + } + return null; + } + + public static Double tryConvertWithCommaRemoval(String str) { + String noCommas = COMMA_PATTERN.matcher(str).replaceAll(""); + return tryParseDouble(noCommas); + } + + public static boolean isPotentiallyConvertible(String str) { + return STARTS_WITH_SIGN_OR_DIGIT.matcher(str).matches(); + } + + public static boolean hasValidUnitSuffix(String str, String leadingNumber) { + if (leadingNumber == null || leadingNumber.length() >= str.length()) { + return false; + } + String suffix = str.substring(leadingNumber.length()).trim(); + if (suffix.isEmpty()) { + return false; + } + char firstChar = suffix.charAt(0); + return !Character.isDigit(firstChar) && firstChar != '.'; + } + + public static Double tryConvertMemoryUnit(String str) { + Matcher matcher = MEMK_PATTERN.matcher(str); + if (!matcher.matches()) { + return null; + } + + Double number = tryParseDouble(matcher.group(1)); + if (number == null) { + return null; + } + + String unit = matcher.group(2); + if (unit == null || unit.equalsIgnoreCase("k")) { + return number; + } + + double multiplier = + switch (unit.toLowerCase()) { + case "m" -> MB_TO_KB; + case "g" -> GB_TO_KB; + default -> 1.0; + }; + + return number * multiplier; + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/MemkConvertFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/MemkConvertFunction.java new file mode 100644 index 00000000000..3154e8ce046 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/MemkConvertFunction.java @@ -0,0 +1,27 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +/** PPL memk() conversion function. */ +public class MemkConvertFunction extends BaseConversionUDF { + + public MemkConvertFunction() { + super(MemkConvertFunction.class); + } + + public static Object convert(Object value) { + if (value instanceof Number) { + return ((Number) value).doubleValue(); + } + + String str = ConversionUtils.preprocessValue(value); + if (str == null) { + return null; + } + + return ConversionUtils.tryConvertMemoryUnit(str); + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/NumConvertFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/NumConvertFunction.java new file mode 100644 index 00000000000..8da8a9fbb31 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/NumConvertFunction.java @@ -0,0 +1,44 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +/** PPL num() conversion function. */ +public class NumConvertFunction extends BaseConversionUDF { + + public NumConvertFunction() { + super(NumConvertFunction.class); + } + + public static Object convert(Object value) { + if (value instanceof Number) { + return ((Number) value).doubleValue(); + } + + String str = ConversionUtils.preprocessValue(value); + if (str == null || !ConversionUtils.isPotentiallyConvertible(str)) { + return null; + } + + Double result = ConversionUtils.tryParseDouble(str); + if (result != null) { + return result; + } + + if (str.contains(",")) { + result = ConversionUtils.tryConvertWithCommaRemoval(str); + if (result != null) { + return result; + } + } + + String leadingNumber = ConversionUtils.extractLeadingNumber(str); + if (ConversionUtils.hasValidUnitSuffix(str, leadingNumber)) { + return ConversionUtils.tryParseDouble(leadingNumber); + } + + return null; + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/RmcommaConvertFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/RmcommaConvertFunction.java new file mode 100644 index 00000000000..c6295495313 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/RmcommaConvertFunction.java @@ -0,0 +1,31 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +/** PPL rmcomma() conversion function. */ +public class RmcommaConvertFunction extends BaseConversionUDF { + + public RmcommaConvertFunction() { + super(RmcommaConvertFunction.class); + } + + public static Object convert(Object value) { + if (value instanceof Number) { + return ((Number) value).doubleValue(); + } + + String str = ConversionUtils.preprocessValue(value); + if (str == null) { + return null; + } + + if (ConversionUtils.CONTAINS_LETTER_PATTERN.matcher(str).matches()) { + return null; + } + + return ConversionUtils.tryConvertWithCommaRemoval(str); + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/RmunitConvertFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/RmunitConvertFunction.java new file mode 100644 index 00000000000..0b1cde6fe15 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/RmunitConvertFunction.java @@ -0,0 +1,28 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +/** PPL rmunit() conversion function. */ +public class RmunitConvertFunction extends BaseConversionUDF { + + public RmunitConvertFunction() { + super(RmunitConvertFunction.class); + } + + public static Object convert(Object value) { + if (value instanceof Number) { + return ((Number) value).doubleValue(); + } + + String str = ConversionUtils.preprocessValue(value); + if (str == null) { + return null; + } + + String numberStr = ConversionUtils.extractLeadingNumber(str); + return numberStr != null ? ConversionUtils.tryParseDouble(numberStr) : null; + } +} diff --git a/core/src/test/java/org/opensearch/sql/expression/function/udf/ConversionUtilsTest.java b/core/src/test/java/org/opensearch/sql/expression/function/udf/ConversionUtilsTest.java new file mode 100644 index 00000000000..eb62cac86ee --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/expression/function/udf/ConversionUtilsTest.java @@ -0,0 +1,247 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +import org.junit.jupiter.api.Test; + +/** Unit tests for conversion functions. */ +public class ConversionUtilsTest { + + // auto() Function Tests + @Test + public void testAutoConvertWithCommas() { + assertEquals(1234.0, AutoConvertFunction.convert("1,234")); + assertEquals(1234.56, AutoConvertFunction.convert("1,234.56")); + assertEquals(1000000.0, AutoConvertFunction.convert("1,000,000")); + } + + @Test + public void testAutoConvertWithUnits() { + assertEquals(123.0, AutoConvertFunction.convert("123 dollars")); + assertEquals(45.67, AutoConvertFunction.convert("45.67 kg")); + assertEquals(100.0, AutoConvertFunction.convert("100ms")); + assertEquals(2.0, AutoConvertFunction.convert("2,12.0 sec")); + } + + @Test + public void testAutoConvertWithMemorySizes() { + assertEquals(100.0, AutoConvertFunction.convert("100k")); + assertEquals(51200.0, AutoConvertFunction.convert("50m")); + assertEquals(2097152.0, AutoConvertFunction.convert("2g")); + assertEquals(100.0, AutoConvertFunction.convert("100")); + assertEquals(-100.0, AutoConvertFunction.convert("-100k")); + } + + @Test + public void testAutoConvertCombined() { + assertEquals(1.0, AutoConvertFunction.convert("1,234 dollars")); + assertEquals(5.0, AutoConvertFunction.convert("5,678.90 USD")); + } + + @Test + public void testAutoConvertComplexCommaPatterns() { + assertEquals(2.0, AutoConvertFunction.convert("2.000")); + assertEquals(22324.0, AutoConvertFunction.convert("2232,4.000,000")); + assertEquals(2232.0, AutoConvertFunction.convert("2232,4.000,000AAAAA")); + } + + @Test + public void testAutoConvertStringsStartingWithLetters() { + assertNull(AutoConvertFunction.convert("AAAA2.000")); + assertNull(AutoConvertFunction.convert("AAAA2.000,000")); + } + + @Test + public void testAutoConvertNullAndEmpty() { + assertNull(AutoConvertFunction.convert((Object) null)); + assertNull(AutoConvertFunction.convert("")); + assertNull(AutoConvertFunction.convert(" ")); + } + + @Test + public void testAutoConvertInvalid() { + assertNull(AutoConvertFunction.convert("abc")); + assertNull(AutoConvertFunction.convert("no numbers here")); + } + + @Test + public void testAutoConvertWithSpacedMemoryUnits() { + assertEquals(123.0, AutoConvertFunction.convert("123 K")); + assertEquals(123.0, AutoConvertFunction.convert("123 M")); + assertEquals(123.0, AutoConvertFunction.convert("123 G")); + assertEquals(50.5, AutoConvertFunction.convert("50.5 m")); + } + + // num() Function Tests + @Test + public void testNumConvert() { + assertEquals(123.0, NumConvertFunction.convert("123")); + assertEquals(123.45, NumConvertFunction.convert("123.45")); + assertEquals(1234.0, NumConvertFunction.convert("1,234")); + assertEquals(123.0, NumConvertFunction.convert("123 dollars")); + } + + @Test + public void testNumConvertWithUnits() { + assertEquals(212.0, NumConvertFunction.convert("212 sec")); + assertNull(NumConvertFunction.convert("no numbers")); + } + + @Test + public void testNumConvertWithCommasAndUnits() { + assertEquals(212.04, NumConvertFunction.convert("212.04,54545 AAA")); + assertEquals(2.0, NumConvertFunction.convert(" 2,12.0 AAA")); + assertNull(NumConvertFunction.convert("AAAA2,12.0 AAA")); + assertEquals(345445.0, NumConvertFunction.convert("34,54,45")); + } + + @Test + public void testNumConvertWithSpacedMemoryUnits() { + // num() extracts numbers from strings with spaced units + assertEquals(123.0, NumConvertFunction.convert("123 K")); + assertEquals(123.0, NumConvertFunction.convert("123 M")); + assertEquals(123.0, NumConvertFunction.convert("123 G")); + assertEquals(50.5, NumConvertFunction.convert("50.5 m")); + } + + // rmcomma() Function Tests + @Test + public void testRmcommaConvert() { + assertEquals(1234.0, RmcommaConvertFunction.convert("1,234")); + assertEquals(1234567.89, RmcommaConvertFunction.convert("1,234,567.89")); + assertEquals(1234.0, RmcommaConvertFunction.convert("1234")); + assertNull(RmcommaConvertFunction.convert("abc,123")); + assertNull(RmcommaConvertFunction.convert("")); + assertNull(RmcommaConvertFunction.convert(null)); + } + + @Test + public void testRmcommaConvertVariations() { + assertNull(RmcommaConvertFunction.convert("abc")); + assertNull(RmcommaConvertFunction.convert("AAA3454,45")); + } + + @Test + public void testRmcommaConvertWithSpacedMemoryUnits() { + assertNull(RmcommaConvertFunction.convert("123 K")); + assertNull(RmcommaConvertFunction.convert("123 M")); + assertNull(RmcommaConvertFunction.convert("123 G")); + assertNull(RmcommaConvertFunction.convert("50.5 m")); + } + + // rmunit() Function Tests + @Test + public void testRmunitConvert() { + assertNull(RmunitConvertFunction.convert("no numbers")); + } + + @Test + public void testRmunitConvertEdgeCases() { + assertEquals(2.0, RmunitConvertFunction.convert("2.000 sec")); + assertEquals(2.0, RmunitConvertFunction.convert("2\\ sec")); + assertNull(RmunitConvertFunction.convert("AAAA2\\ sec")); + assertEquals(2.0, RmunitConvertFunction.convert(" 2.000,7878789\\ sec")); + assertEquals(34.0, RmunitConvertFunction.convert("34,54,45")); + } + + @Test + public void testRmunitConvertWithSpacedMemoryUnits() { + assertEquals(123.0, RmunitConvertFunction.convert("123 K")); + assertEquals(123.0, RmunitConvertFunction.convert("123 M")); + assertEquals(123.0, RmunitConvertFunction.convert("123 G")); + assertEquals(50.5, RmunitConvertFunction.convert("50.5 m")); + } + + // memk() Function Tests + @Test + public void testMemkConvert() { + assertEquals(100.0, MemkConvertFunction.convert("100")); + assertEquals(100.0, MemkConvertFunction.convert(100)); + assertEquals(100.5, MemkConvertFunction.convert("100.5")); + + assertEquals(100.0, MemkConvertFunction.convert("100k")); + assertEquals(100.0, MemkConvertFunction.convert("100K")); + + assertEquals(51200.0, MemkConvertFunction.convert("50m")); + assertEquals(51200.0, MemkConvertFunction.convert("50M")); + assertEquals(102912.0, MemkConvertFunction.convert("100.5m")); + + assertEquals(2097152.0, MemkConvertFunction.convert("2g")); + assertEquals(2097152.0, MemkConvertFunction.convert("2G")); + assertEquals(1.5 * 1024 * 1024, MemkConvertFunction.convert("1.5g")); + + assertEquals(-100.0, MemkConvertFunction.convert("-100")); + assertEquals(-51200.0, MemkConvertFunction.convert("-50m")); + assertEquals(-2097152.0, MemkConvertFunction.convert("-2g")); + assertEquals(-100.0, MemkConvertFunction.convert("-100k")); + + assertEquals(100.0, MemkConvertFunction.convert("+100")); + assertEquals(51200.0, MemkConvertFunction.convert("+50m")); + + assertNull(MemkConvertFunction.convert("abc")); + assertNull(MemkConvertFunction.convert("100x")); + assertNull(MemkConvertFunction.convert("100 gb")); + assertNull(MemkConvertFunction.convert("")); + assertNull(MemkConvertFunction.convert(null)); + assertNull(MemkConvertFunction.convert(" ")); + + assertNull(MemkConvertFunction.convert("100 k")); + assertNull(MemkConvertFunction.convert("50 m")); + assertNull(MemkConvertFunction.convert("2 g")); + + assertNull(MemkConvertFunction.convert("abc100m")); + assertNull(MemkConvertFunction.convert("test50k")); + assertNull(MemkConvertFunction.convert("memory2g")); + } + + // Cross-Function Tests + @Test + public void testScientificNotation() { + assertEquals(100000.0, NumConvertFunction.convert("1e5")); + assertEquals(100000.0, AutoConvertFunction.convert("1e5")); + assertEquals(1.23e-4, NumConvertFunction.convert("1.23e-4")); + assertEquals(1.23e-4, AutoConvertFunction.convert("1.23e-4")); + assertEquals(100000.0, NumConvertFunction.convert("1e5 meters")); + assertEquals(100000.0, RmunitConvertFunction.convert("1e5 meters")); + } + + @Test + public void testSpecialValues() { + assertNull(NumConvertFunction.convert("∞")); + assertNull(AutoConvertFunction.convert("∞")); + assertNull(NumConvertFunction.convert("Infinity")); + assertNull(AutoConvertFunction.convert("Infinity")); + assertNull(NumConvertFunction.convert("NaN")); + assertNull(AutoConvertFunction.convert("NaN")); + } + + @Test + public void testNegativeNumbers() { + assertEquals(-123.0, NumConvertFunction.convert("-123")); + assertEquals(-123.45, AutoConvertFunction.convert("-123.45")); + assertEquals(-1234.0, RmcommaConvertFunction.convert("-1,234")); + assertEquals(-100.0, RmunitConvertFunction.convert("-100km")); + } + + @Test + public void testLeadingPlusSign() { + assertEquals(123.0, NumConvertFunction.convert("+123")); + assertEquals(123.45, AutoConvertFunction.convert("+123.45")); + assertEquals(100.0, RmunitConvertFunction.convert("+100km")); + assertEquals(1234.0, RmcommaConvertFunction.convert("+1,234")); + } + + @Test + public void testMalformedNumbers() { + assertNull(NumConvertFunction.convert("1.2.3")); + assertNull(AutoConvertFunction.convert("1.2.3")); + assertEquals(1234.0, NumConvertFunction.convert("1,,234")); + assertEquals(1234.0, AutoConvertFunction.convert("1,,234")); + } +} diff --git a/docs/user/ppl/cmd/convert.md b/docs/user/ppl/cmd/convert.md new file mode 100644 index 00000000000..5ca9d4c3665 --- /dev/null +++ b/docs/user/ppl/cmd/convert.md @@ -0,0 +1,256 @@ +# convert + +The `convert` command applies conversion functions to transform field values into different data types and formats. + +## Syntax + +The `convert` command has the following syntax: + +```syntax +convert () [AS ] [, () [AS ]]... +``` + +## Parameters + +The `convert` command supports the following parameters. + +| Parameter | Required/Optional | Description | +| --- | --- | --- | +| `` | Required | One of the conversion functions: `auto()`, `num()`, `rmcomma()`, `rmunit()`, `memk()`, or `none()`. | +| `` | Required | Single field name to convert, or `*` to convert all fields. | +| `AS ` | Optional | Create new field with converted value, preserving original field. | + +## Conversion Functions + +| Function | Description | +| --- | --- | +| `auto(field)` | Automatically converts fields to numbers using intelligent conversion. Handles memory sizes (k/m/g), commas, units, and scientific notation. Returns `null` for non-convertible values. | +| `num(field)` | Extracts leading numbers from strings. For strings without letters: removes commas as thousands separators. For strings with letters: extracts leading number, stops at letters or commas. Returns `null` for non-convertible values. | +| `rmcomma(field)` | Removes commas from field values and converts to a number. Returns `null` if the value contains letters. | +| `rmunit(field)` | Extracts leading numeric values from strings. Stops at the first non-numeric character (including commas). Returns `null` for non-convertible values. | +| `memk(field)` | Converts memory size strings to kilobytes. Accepts numbers with optional k/m/g suffix (case-insensitive). Default unit is kilobytes. Returns `null` for invalid formats. | +| `none(field)` | No-op function that preserves the original field value. Used for excluding specific fields from wildcard conversions. | + +## Example 1: Basic auto() conversion + +The following query converts the `balance` field to a number using the `auto()` function: + +```ppl +source=accounts +| convert auto(balance) +| fields account_number, balance +| head 3 +``` + +The query returns the following results: + +```text +fetched rows / total rows = 3/3 ++----------------+---------+ +| account_number | balance | +|----------------+---------| +| 1 | 39225.0 | +| 6 | 5686.0 | +| 13 | 32838.0 | ++----------------+---------+ +``` + +## Example 2: Convert with commas using num() + +The following query converts a field containing comma-separated numbers: + +```ppl +source=accounts +| eval price='1,234' +| convert num(price) +| fields price +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++---------+ +| price | +|---------| +| 1234.0 | ++---------+ +``` + +## Example 3: Memory size conversion with memk() + +The following query converts memory size strings to kilobytes: + +```ppl +source=system_metrics +| eval memory='100m' +| convert memk(memory) +| fields memory +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++----------+ +| memory | +|----------| +| 102400.0 | ++----------+ +``` + +## Example 4: Multiple field conversions + +The following query converts multiple fields using different conversion functions: + +```ppl +source=accounts +| convert auto(balance), num(age) +| fields account_number, balance, age +| head 3 +``` + +The query returns the following results: + +```text +fetched rows / total rows = 3/3 ++----------------+---------+------+ +| account_number | balance | age | +|----------------+---------+------| +| 1 | 39225.0 | 32.0 | +| 6 | 5686.0 | 36.0 | +| 13 | 32838.0 | 28.0 | ++----------------+---------+------+ +``` + +## Example 5: Using AS clause to preserve original values + +The following query creates a new field with the converted value while preserving the original: + +```ppl +source=accounts +| convert auto(balance) AS balance_num +| fields account_number, balance, balance_num +| head 3 +``` + +The query returns the following results: + +```text +fetched rows / total rows = 3/3 ++----------------+---------+-------------+ +| account_number | balance | balance_num | +|----------------+---------+-------------| +| 1 | 39225 | 39225.0 | +| 6 | 5686 | 5686.0 | +| 13 | 32838 | 32838.0 | ++----------------+---------+-------------+ +``` + +## Example 6: Extract numbers from strings with units + +The following query extracts numeric values from strings containing units: + +```ppl +source=metrics +| eval duration='2.000 sec' +| convert rmunit(duration) +| fields duration +``` + +The query returns the following results: + +```text +fetched rows / total rows = 1/1 ++----------+ +| duration | +|----------| +| 2.0 | ++----------+ +``` + +## Example 7: Integration with aggregation functions + +The following query converts values and uses them in aggregations: + +```ppl +source=accounts +| convert auto(balance) +| stats avg(balance) by gender +``` + +The query returns the following results: + +```text +fetched rows / total rows = 2/2 ++--------------+--------+ +| avg(balance) | gender | +|--------------+--------| +| 25208.15 | M | +| 27992.571... | F | ++--------------+--------+ +``` + +## Example 8: Using none() to preserve field values + +The `none()` function acts as a pass-through, returning the field value unchanged. This is useful for explicitly preserving fields in multi-field conversions: + +```ppl +source=accounts +| convert auto(balance), num(age), none(account_number) +| fields account_number, balance, age +| head 3 +``` + +The query returns the following results: + +```text +fetched rows / total rows = 3/3 ++----------------+---------+------+ +| account_number | balance | age | +|----------------+---------+------| +| 1 | 39225.0 | 32.0 | +| 6 | 5686.0 | 36.0 | +| 13 | 32838.0 | 28.0 | ++----------------+---------+------+ +``` + +### Using none() with AS for field renaming + +The `none()` function can be combined with the `AS` clause to rename a field without modifying its value: + +```ppl +source=accounts +| convert none(account_number) AS account_id +| fields account_id, firstname, lastname +| head 3 +``` + +The query returns the following results: + +```text +fetched rows / total rows = 3/3 ++------------+-----------+----------+ +| account_id | firstname | lastname | +|------------+-----------+----------| +| 1 | Amber | Duke | +| 6 | Hattie | Bond | +| 13 | Nanette | Bates | ++------------+-----------+----------| +``` + +**Note:** The `none()` function is particularly useful when wildcard support is implemented, allowing you to exclude specific fields from bulk conversions. + +## Notes + +- All conversion functions return `null` for values that cannot be converted to a number +- All numeric conversion functions return double precision numbers to support aggregations +- Converted numbers display with decimal notation (e.g., `1234.0`, `1234.56`) +- Use the `AS` clause to preserve original fields while creating converted versions +- Multiple conversions can be applied in a single command + +## Limitations + +The `convert` command can only work with `plugins.calcite.enabled=true`. + +When Calcite is disabled, attempting to use convert functions will result in an "unsupported function" error. diff --git a/docs/user/ppl/index.md b/docs/user/ppl/index.md index 30ad7159182..8bd0f46198f 100644 --- a/docs/user/ppl/index.md +++ b/docs/user/ppl/index.md @@ -43,6 +43,7 @@ source=accounts | [fields command](cmd/fields.md) | 1.0 | stable (since 1.0) | Keep or remove fields from the search result. | | [rename command](cmd/rename.md) | 1.0 | stable (since 1.0) | Rename one or more fields in the search result. | | [eval command](cmd/eval.md) | 1.0 | stable (since 1.0) | Evaluate an expression and append the result to the search result. | +| [convert command](cmd/convert.md) | 3.5 | experimental (since 3.5) | Convert fields to different data types using conversion functions. | | [replace command](cmd/replace.md) | 3.4 | experimental (since 3.4) | Replace text in one or more fields in the search result | | [fillnull command](cmd/fillnull.md) | 3.0 | experimental (since 3.0) | Fill null with provided value in one or more fields in the search result. | | [expand command](cmd/expand.md) | 3.1 | experimental (since 3.1) | Transform a single document into multiple documents by expanding a nested array field. | @@ -99,4 +100,4 @@ source=accounts * **Optimization** - [Optimization](../../user/optimization/optimization.rst) * **Limitations** - - [Limitations](limitations/limitations.md) \ No newline at end of file + - [Limitations](limitations/limitations.md) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index 22a6f6b5916..61132ecef22 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -23,6 +23,7 @@ CalciteExplainIT.class, CalciteAddTotalsCommandIT.class, CalciteAddColTotalsCommandIT.class, + CalciteConvertCommandIT.class, CalciteArrayFunctionIT.class, CalciteBinCommandIT.class, CalciteConvertTZFunctionIT.class, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteConvertCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteConvertCommandIT.java new file mode 100644 index 00000000000..1b62eff9ca9 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteConvertCommandIT.java @@ -0,0 +1,263 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +/** Integration tests for the PPL convert command with Calcite enabled. */ +public class CalciteConvertCommandIT extends PPLIntegTestCase { + @Override + public void init() throws Exception { + super.init(); + loadIndex(Index.BANK); + enableCalcite(); + } + + @Test + public void testConvertAutoFunction() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | convert auto(balance) | fields balance | head 3", + TEST_INDEX_BANK)); + verifySchema(result, schema("balance", null, "double")); + verifyDataRows(result, rows(39225.0), rows(5686.0), rows(32838.0)); + } + + @Test + public void testConvertAutoWithStringField() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval test_field = '42' | convert auto(test_field) |" + + " fields test_field | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("test_field", null, "double")); + verifyDataRows(result, rows(42.0)); + } + + @Test + public void testConvertNumFunction() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | convert num(balance) | fields balance | head 3", + TEST_INDEX_BANK)); + verifySchema(result, schema("balance", null, "double")); + verifyDataRows(result, rows(39225.0), rows(5686.0), rows(32838.0)); + } + + @Test + public void testConvertWithAlias() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | convert auto(balance) AS balance_num | fields balance_num |" + + " head 3", + TEST_INDEX_BANK)); + verifySchema(result, schema("balance_num", null, "double")); + verifyDataRows(result, rows(39225.0), rows(5686.0), rows(32838.0)); + } + + @Test + public void testConvertMultipleFunctions() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | convert auto(balance), num(age) | fields balance, age | head 3", + TEST_INDEX_BANK)); + verifySchema(result, schema("balance", null, "double"), schema("age", null, "double")); + verifyDataRows(result, rows(39225.0, 32.0), rows(5686.0, 36.0), rows(32838.0, 28.0)); + } + + @Test + public void testConvertRmcommaFunction() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval amount = '1,234,567.89' | convert rmcomma(amount) |" + + " fields amount | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("amount", null, "double")); + verifyDataRows(result, rows(1234567.89)); + } + + @Test + public void testConvertRmunitFunction() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval distance = '100km' | convert rmunit(distance) |" + + " fields distance | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("distance", null, "double")); + verifyDataRows(result, rows(100.0)); + } + + @Test + public void testConvertRmunitWithNoNumbers() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval duration = 'no numbers' | convert rmunit(duration) |" + + " fields duration | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("duration", null, "double")); + verifyDataRows(result, rows((Object) null)); + } + + @Test + public void testConvertMemkFunction() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval memory = '100m' | convert memk(memory) |" + + " fields memory | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("memory", null, "double")); + verifyDataRows(result, rows(102400.0)); + } + + @Test + public void testConvertMemkWithDefaultKilobytes() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval memory = '100' | convert memk(memory) |" + + " fields memory | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("memory", null, "double")); + verifyDataRows(result, rows(100.0)); + } + + @Test + public void testConvertMemkWithGigabytes() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval memory = '2g' | convert memk(memory) |" + + " fields memory | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("memory", null, "double")); + verifyDataRows(result, rows(2097152.0)); + } + + @Test + public void testConvertMemkWithNegative() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval memory = '-100m' | convert memk(memory) |" + + " fields memory | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("memory", null, "double")); + verifyDataRows(result, rows(-102400.0)); + } + + @Test + public void testConvertNumWithNoNumbers() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval text = 'no numbers here' | convert num(text) |" + + " fields text | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("text", null, "double")); + verifyDataRows(result, rows((Object) null)); + } + + @Test + public void testConvertRmcommaWithLetters() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval text = 'abc123' | convert rmcomma(text) |" + + " fields text | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("text", null, "double")); + verifyDataRows(result, rows((Object) null)); + } + + @Test + public void testConvertNoneFunction() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | convert none(account_number) | fields account_number | head 3", + TEST_INDEX_BANK)); + verifySchema(result, schema("account_number", null, "bigint")); + verifyDataRows(result, rows(1), rows(6), rows(13)); + } + + @Test + public void testConvertWithWhere() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | where age > 30 | convert auto(balance) | fields balance, age |" + + " head 3", + TEST_INDEX_BANK)); + verifySchema(result, schema("balance", null, "double"), schema("age", null, "int")); + verifyNumOfRows(result, 3); + } + + @Test + public void testConvertWithStats() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | convert auto(balance) | stats avg(balance) by gender", + TEST_INDEX_BANK)); + verifySchema(result, schema("avg(balance)", null, "double"), schema("gender", "string")); + verifyNumOfRows(result, 2); + } + + @Test + public void testConvertAutoWithMemorySizes() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval memory = '100m' | convert auto(memory) |" + + " fields memory | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("memory", null, "double")); + verifyDataRows(result, rows(102400.0)); + } + + @Test + public void testConvertAutoWithMemorySizesKilobytes() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval memory = '100k' | convert auto(memory) |" + + " fields memory | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("memory", null, "double")); + verifyDataRows(result, rows(100.0)); + } + + @Test + public void testConvertAutoWithMemorySizesGigabytes() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | eval memory = '2g' | convert auto(memory) |" + + " fields memory | head 1", + TEST_INDEX_BANK)); + verifySchema(result, schema("memory", null, "double")); + verifyDataRows(result, rows(2097152.0)); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index c57b33ab6d8..066cfb712a4 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2315,6 +2315,35 @@ public void testNestedAggExplainWhenPushdownNotApplied() throws Exception { verifyErrorMessageContains(e, "Cannot execute nested aggregation on"); } + @Test + public void testConvertCommandExplain() throws IOException { + String expected = loadExpectedPlan("explain_convert_command.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_bank | convert auto(balance) | fields balance")); + } + + @Test + public void testConvertWithAliasExplain() throws IOException { + String expected = loadExpectedPlan("explain_convert_with_alias.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_bank | convert auto(balance) AS balance_num | fields" + + " balance_num")); + } + + @Test + public void testConvertMultipleFunctionsExplain() throws IOException { + String expected = loadExpectedPlan("explain_convert_multiple.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_bank | convert auto(balance), num(age) | fields" + + " balance, age")); + } + @Test public void testNotBetweenPushDownExplain() throws Exception { // test for issue https://github.com/opensearch-project/sql/issues/4903 diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ConvertCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ConvertCommandIT.java new file mode 100644 index 00000000000..099992c9298 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ConvertCommandIT.java @@ -0,0 +1,77 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.util.MatcherUtils.verifyErrorMessageContains; + +import org.junit.jupiter.api.Test; + +/** Integration tests for the PPL convert command when Calcite is disabled. */ +public class ConvertCommandIT extends PPLIntegTestCase { + @Override + public void init() throws Exception { + super.init(); + loadIndex(Index.BANK); + } + + @Test + public void testConvertAutoFunction() { + verifyQueryThrowsCalciteError("source=%s | convert auto(balance) | fields balance"); + } + + @Test + public void testConvertAutoWithMixedData() { + verifyQueryThrowsCalciteError( + "source=%s | eval test_field = '42' | convert auto(test_field) | fields test_field"); + } + + @Test + public void testConvertNumFunction() { + verifyQueryThrowsCalciteError("source=%s | convert num(balance) | fields balance"); + } + + @Test + public void testConvertWithAlias() { + verifyQueryThrowsCalciteError( + "source=%s | convert auto(balance) AS balance_num | fields balance_num"); + } + + @Test + public void testConvertMultipleFunctions() { + verifyQueryThrowsCalciteError( + "source=%s | convert auto(balance), num(age) | fields balance, age"); + } + + @Test + public void testConvertRmcommaFunction() { + verifyQueryThrowsCalciteError("source=%s | convert rmcomma(firstname) | fields firstname"); + } + + @Test + public void testConvertNoneFunction() { + verifyQueryThrowsCalciteError( + "source=%s | convert none(account_number) | fields account_number"); + } + + @Test + public void testConvertWithWhere() { + verifyQueryThrowsCalciteError( + "source=%s | where age > 30 | convert auto(balance) | fields balance"); + } + + @Test + public void testConvertWithStats() { + verifyQueryThrowsCalciteError( + "source=%s | convert auto(balance) | stats avg(balance) by gender"); + } + + private void verifyQueryThrowsCalciteError(String query) { + Exception e = + assertThrows(Exception.class, () -> executeQuery(String.format(query, TEST_INDEX_BANK))); + verifyErrorMessageContains(e, "convert is supported only when plugins.calcite.enabled=true"); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java index 15f3c508b14..2852f58f99b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java @@ -202,6 +202,17 @@ public void testAddColTotalCommand() throws IOException { } } + @Test + public void testConvertCommand() throws IOException { + JSONObject result; + try { + executeQuery(String.format("search source=%s | convert auto(balance)", TEST_INDEX_BANK)); + } catch (ResponseException e) { + result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + verifyQuery(result); + } + } + private void verifyQuery(JSONObject result) throws IOException { if (isCalciteEnabled()) { assertFalse(result.getJSONArray("datarows").isEmpty()); diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java index 1cbd019eca3..ae526f572b3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java @@ -349,4 +349,26 @@ public void testCrossClusterRexWithOffsetField() throws IOException { verifyDataRows( result, rows("Duke Willmington", "u", "vowel=1-1"), rows("Bond", "o", "vowel=1-1")); } + + @Test + public void testCrossClusterConvert() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | convert auto(balance) | fields balance", + TEST_INDEX_BANK_REMOTE)); + verifyColumn(result, columnName("balance")); + verifySchema(result, schema("balance", "double")); + } + + @Test + public void testCrossClusterConvertWithAlias() throws IOException { + JSONObject result = + executeQuery( + String.format( + "search source=%s | convert auto(balance) AS balance_num | fields balance_num", + TEST_INDEX_BANK_REMOTE)); + verifyColumn(result, columnName("balance_num")); + verifySchema(result, schema("balance_num", "double")); + } } diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_command.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_command.yaml new file mode 100644 index 00000000000..16179925565 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_command.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(balance=[AUTO($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0=[{inputs}], expr#1=[AUTO($t0)], balance=[$t1]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[balance], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["balance"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_multiple.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_multiple.yaml new file mode 100644 index 00000000000..a4940d90124 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_multiple.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(balance=[AUTO($7)], age=[NUM($10)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0..1=[{inputs}], expr#2=[AUTO($t0)], expr#3=[NUM($t1)], balance=[$t2], age=[$t3]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[balance, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["balance","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_with_alias.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_with_alias.yaml new file mode 100644 index 00000000000..91340ada0ba --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_convert_with_alias.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(balance_num=[AUTO($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableCalc(expr#0=[{inputs}], expr#1=[AUTO($t0)], balance_num=[$t1]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[balance], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["balance"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_command.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_command.yaml new file mode 100644 index 00000000000..fc212d1aae3 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_command.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(balance=[AUTO($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[AUTO($t7)], balance=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_multiple.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_multiple.yaml new file mode 100644 index 00000000000..c06990881d8 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_multiple.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(balance=[AUTO($7)], age=[NUM($10)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[AUTO($t7)], expr#20=[NUM($t10)], balance=[$t19], age=[$t20]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_with_alias.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_with_alias.yaml new file mode 100644 index 00000000000..062c5637ca7 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_convert_with_alias.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(balance_num=[AUTO($7)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=[AUTO($t7)], balance_num=[$t19]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]]) diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 71162e81bd8..562a3cecf39 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -45,6 +45,7 @@ AD: 'AD'; ML: 'ML'; FILLNULL: 'FILLNULL'; FLATTEN: 'FLATTEN'; +CONVERT: 'CONVERT'; TRENDLINE: 'TRENDLINE'; CHART: 'CHART'; TIMECHART: 'TIMECHART'; @@ -153,6 +154,7 @@ USEOTHER: 'USEOTHER'; OTHERSTR: 'OTHERSTR'; NULLSTR: 'NULLSTR'; TIMEFIELD: 'TIMEFIELD'; +TIMEFORMAT: 'TIMEFORMAT'; INPUT: 'INPUT'; OUTPUT: 'OUTPUT'; PATH: 'PATH'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index e4500ee1e6f..ce04b3ade1a 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -74,6 +74,7 @@ commands | adCommand | mlCommand | fillnullCommand + | convertCommand | trendlineCommand | appendcolCommand | addtotalsCommand @@ -117,6 +118,7 @@ commandName | AD | ML | FILLNULL + | CONVERT | EXPAND | FLATTEN | TRENDLINE @@ -514,6 +516,14 @@ replacementPair : fieldExpression EQUAL replacement = valueExpression ; +convertCommand + : CONVERT (TIMEFORMAT EQUAL timeformatValue = stringLiteral)? convertFunction (COMMA convertFunction)* + ; + +convertFunction + : functionName = ident LT_PRTHS fieldList RT_PRTHS (AS alias = fieldExpression)? + ; + trendlineCommand : TRENDLINE (SORT sortField)? trendlineClause (trendlineClause)* ; @@ -1658,4 +1668,3 @@ searchableKeyWord | ROW | COL ; - diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 7ac29faf4c9..e61f6e42d15 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -75,6 +75,7 @@ import org.opensearch.sql.ast.tree.AppendCol; import org.opensearch.sql.ast.tree.AppendPipe; import org.opensearch.sql.ast.tree.Chart; +import org.opensearch.sql.ast.tree.Convert; import org.opensearch.sql.ast.tree.CountBin; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.DefaultBin; @@ -1146,6 +1147,87 @@ public UnresolvedPlan visitFillNullValueAllFields( return FillNull.ofSameValue(internalVisitExpression(ctx.replacement), List.of(), true); } + @Override + public UnresolvedPlan visitConvertCommand(OpenSearchPPLParser.ConvertCommandContext ctx) { + List conversions = new ArrayList<>(); + + for (OpenSearchPPLParser.ConvertFunctionContext funcCtx : ctx.convertFunction()) { + Let conversion = buildConversion(funcCtx); + if (conversion != null) { + conversions.add(conversion); + } + } + + return new Convert(conversions); + } + + private Let buildConversion(OpenSearchPPLParser.ConvertFunctionContext funcCtx) { + String functionName = funcCtx.functionName.getText(); + List fieldArgs = extractFieldArguments(funcCtx); + Field targetField = determineTargetField(funcCtx, fieldArgs); + + if ("none".equalsIgnoreCase(functionName)) { + return handleNoneConversion(fieldArgs, targetField); + } + + return buildFunctionConversion(functionName, fieldArgs, targetField); + } + + private List extractFieldArguments( + OpenSearchPPLParser.ConvertFunctionContext funcCtx) { + if (funcCtx.fieldList() == null) { + return new ArrayList<>(); + } + + List fieldArgs = new ArrayList<>(); + for (OpenSearchPPLParser.FieldExpressionContext fieldExpr : + funcCtx.fieldList().fieldExpression()) { + fieldArgs.add(internalVisitExpression(fieldExpr)); + } + return fieldArgs; + } + + private Let handleNoneConversion(List fieldArgs, Field targetField) { + if (fieldArgs.isEmpty()) { + return null; + } + + String sourceFieldName = fieldArgs.get(0).toString(); + String targetFieldName = targetField.getField().toString(); + + if (sourceFieldName.equals(targetFieldName)) { + return null; + } + + return new Let(targetField, fieldArgs.get(0)); + } + + private Let buildFunctionConversion( + String functionName, List fieldArgs, Field targetField) { + UnresolvedExpression functionCall = + AstDSL.function(functionName, fieldArgs.toArray(new UnresolvedExpression[0])); + return new Let(targetField, functionCall); + } + + private Field determineTargetField( + OpenSearchPPLParser.ConvertFunctionContext funcCtx, List fieldArgs) { + if (funcCtx.alias != null) { + String aliasName = StringUtils.unquoteIdentifier(funcCtx.alias.getText()); + return AstDSL.field(aliasName); + } + + if (!fieldArgs.isEmpty()) { + UnresolvedExpression firstArg = fieldArgs.get(0); + if (firstArg instanceof Field) { + return (Field) firstArg; + } + return AstDSL.field(firstArg.toString()); + } + + throw new IllegalArgumentException( + "Convert function must have either an alias or at least one field argument"); + } + @Override public UnresolvedPlan visitFlattenCommand(OpenSearchPPLParser.FlattenCommandContext ctx) { Field field = (Field) internalVisitExpression(ctx.fieldExpression()); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index a1e31e896dc..1739bfec635 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -68,6 +68,7 @@ import org.opensearch.sql.ast.tree.AppendPipe; import org.opensearch.sql.ast.tree.Bin; import org.opensearch.sql.ast.tree.Chart; +import org.opensearch.sql.ast.tree.Convert; import org.opensearch.sql.ast.tree.CountBin; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.DefaultBin; @@ -455,6 +456,40 @@ public String visitEval(Eval node, String context) { return StringUtils.format("%s | eval %s", child, expressions); } + @Override + public String visitConvert(Convert node, String context) { + String child = node.getChild().get(0).accept(this, context); + String conversions = + node.getConversions().stream() + .map( + conversion -> { + String functionName = ""; + String fields = MASK_COLUMN; + String actualSourceField = ""; + + if (conversion.getExpression() instanceof Function) { + Function func = (Function) conversion.getExpression(); + functionName = func.getFuncName().toLowerCase(Locale.ROOT); + if (!func.getFuncArgs().isEmpty() + && func.getFuncArgs().get(0) instanceof Field) { + actualSourceField = ((Field) func.getFuncArgs().get(0)).getField().toString(); + } + fields = + func.getFuncArgs().stream() + .map(arg -> MASK_COLUMN) + .collect(Collectors.joining(",")); + } + + String targetField = conversion.getVar().getField().toString(); + + String asClause = + !targetField.equals(actualSourceField) ? " AS " + MASK_COLUMN : ""; + return StringUtils.format("%s(%s)%s", functionName, fields, asClause); + }) + .collect(Collectors.joining(",")); + return StringUtils.format("%s | convert %s", child, conversions); + } + @Override public String visitExpand(Expand node, String context) { String child = node.getChild().getFirst().accept(this, context); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLConvertTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLConvertTest.java new file mode 100644 index 00000000000..936b4212f4f --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLConvertTest.java @@ -0,0 +1,272 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; + +/** Unit tests for PPL convert command. */ +public class CalcitePPLConvertTest extends CalcitePPLAbstractTest { + + public CalcitePPLConvertTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testConvertBasic() { + String ppl = "source=EMP | convert auto(SAL)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[AUTO($5)]," + + " COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, AUTO(`SAL`) `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertWithAlias() { + String ppl = "source=EMP | convert auto(SAL) AS salary_num"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], salary_num=[AUTO($5)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, AUTO(`SAL`)" + + " `salary_num`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertMultipleFunctions() { + String ppl = "source=EMP | convert auto(SAL), num(COMM)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[AUTO($5)]," + + " COMM=[NUM($6)], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, AUTO(`SAL`) `SAL`, NUM(`COMM`) `COMM`," + + " `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertMultipleWithAliases() { + String ppl = "source=EMP | convert auto(SAL) AS salary, num(COMM) AS commission"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], salary=[AUTO($5)], commission=[NUM($6)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, AUTO(`SAL`)" + + " `salary`, NUM(`COMM`) `commission`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertWithFields() { + String ppl = "source=EMP | convert auto(SAL) AS salary_num | fields EMPNO, ENAME, salary_num"; + RelNode root = getRelNode(ppl); + // Calcite optimizes the two projections into one - this is more efficient + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], salary_num=[AUTO($5)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, AUTO(`SAL`) `salary_num`\n" + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertNumFunction() { + String ppl = "source=EMP | convert num(SAL)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[NUM($5)]," + + " COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, NUM(`SAL`) `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertRmcommaFunction() { + String ppl = "source=EMP | convert rmcomma(ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[RMCOMMA($1)], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, RMCOMMA(`ENAME`) `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`," + + " `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertRmunitFunction() { + String ppl = "source=EMP | convert rmunit(ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[RMUNIT($1)], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, RMUNIT(`ENAME`) `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`," + + " `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertMemkFunction() { + String ppl = "source=EMP | convert memk(ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[MEMK($1)], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, MEMK(`ENAME`) `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`," + + " `DEPTNO`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertNoneFunction() { + String ppl = "source=EMP | convert none(ENAME)"; + RelNode root = getRelNode(ppl); + String expectedLogical = "LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = "SELECT *\n" + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertWithWhere() { + String ppl = "source=EMP | where DEPTNO = 10 | convert auto(SAL)"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[AUTO($5)]," + + " COMM=[$6], DEPTNO=[$7])\n" + + " LogicalFilter(condition=[=($7, 10)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, AUTO(`SAL`) `SAL`, `COMM`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "WHERE `DEPTNO` = 10"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertWithSort() { + String ppl = "source=EMP | convert auto(SAL) AS salary_num | sort - salary_num | head 3"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalSort(sort0=[$8], dir0=[DESC-nulls-last], fetch=[3])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], salary_num=[AUTO($5)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, AUTO(`SAL`)" + + " `salary_num`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY 9 DESC\n" + + "LIMIT 3"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertWithStats() { + String ppl = "source=EMP | convert auto(SAL) AS salary_num | stats avg(salary_num) by DEPTNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(avg(salary_num)=[$1], DEPTNO=[$0])\n" + + " LogicalAggregate(group=[{0}], avg(salary_num)=[AVG($1)])\n" + + " LogicalProject(DEPTNO=[$7], salary_num=[AUTO($5)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT AVG(AUTO(`SAL`)) `avg(salary_num)`, `DEPTNO`\n" + + "FROM `scott`.`EMP`\n" + + "GROUP BY `DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertAllFunctions() { + String ppl = + "source=EMP | convert auto(SAL) AS sal_auto, num(COMM) AS comm_num, rmcomma(ENAME) AS" + + " name_clean, rmunit(JOB) AS job_clean, none(EMPNO) AS empno_same"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], sal_auto=[AUTO($5)], comm_num=[NUM($6)]," + + " name_clean=[RMCOMMA($1)], job_clean=[RMUNIT($2)], empno_same=[$0])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, AUTO(`SAL`)" + + " `sal_auto`, NUM(`COMM`) `comm_num`, RMCOMMA(`ENAME`) `name_clean`, RMUNIT(`JOB`)" + + " `job_clean`, `EMPNO` `empno_same`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testConvertAutoWithMemoryField() { + String ppl = "source=EMP | convert auto(JOB) AS memory_size"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$7], memory_size=[AUTO($2)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + + String expectedSparkSql = + "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, AUTO(`JOB`)" + + " `memory_size`\n" + + "FROM `scott`.`EMP`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 2fd08988f6b..f5f9957167e 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -1010,4 +1010,22 @@ public void testMvfind() { "source=t | eval result=mvfind(array('apple', 'banana', 'apricot'), 'ban.*') | fields" + " result")); } + + @Test + public void testConvertCommand() { + assertEquals( + "source=table | convert auto(identifier)", anonymize("source=t | convert auto(salary)")); + assertEquals( + "source=table | convert auto(identifier) AS identifier", + anonymize("source=t | convert auto(salary) AS salary_num")); + assertEquals( + "source=table | convert auto(identifier),num(identifier)", + anonymize("source=t | convert auto(salary), num(commission)")); + assertEquals( + "source=table | convert rmcomma(identifier),rmunit(identifier),(identifier) AS identifier", + anonymize("source=t | convert rmcomma(name), rmunit(revenue), none(id)")); + assertEquals( + "source=table | convert (identifier) AS identifier", + anonymize("source=t | convert none(empno) AS empno_same")); + } }