diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 24cef144c97..5a3d2b6070f 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -99,6 +99,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; @@ -703,6 +704,11 @@ public LogicalPlan visitML(ML node, AnalysisContext context) { return new LogicalML(child, node.getArguments()); } + @Override + public LogicalPlan visitTranspose(Transpose node, AnalysisContext context) { + throw getOnlyForCalciteException("Transpose"); + } + @Override public LogicalPlan visitBin(Bin node, AnalysisContext context) { throw getOnlyForCalciteException("Bin"); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index a6ef5e7547a..be5bce69032 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -86,6 +86,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; @@ -282,6 +283,10 @@ public T visitReverse(Reverse node, C context) { return visitChildren(node, context); } + public T visitTranspose(Transpose node, C context) { + return visitChildren(node, context); + } + public T visitChart(Chart node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java new file mode 100644 index 00000000000..2705853088b --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java @@ -0,0 +1,66 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.*; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.common.utils.StringUtils; + +/** AST node represent Transpose operation. */ +@Getter +@Setter +@ToString +@EqualsAndHashCode(callSuper = false) +@RequiredArgsConstructor +public class Transpose extends UnresolvedPlan { + private final @NonNull java.util.Map arguments; + private UnresolvedPlan child; + private static final int MAX_LIMIT_TRANSPOSE = 10000; + + public Integer getMaxRows() { + Integer maxRows = 5; + if (arguments.containsKey("number") && arguments.get("number").getValue() != null) { + try { + maxRows = Integer.parseInt(arguments.get("number").getValue().toString()); + } catch (NumberFormatException e) { + // log warning and use default + maxRows = 5; + } + } + if (maxRows > MAX_LIMIT_TRANSPOSE) { + throw new IllegalArgumentException( + StringUtils.format("Maximum limit to transpose is %s", MAX_LIMIT_TRANSPOSE)); + } + return maxRows; + } + + public String getColumnName() { + String columnName = "column"; + if (arguments.containsKey("columnName") && arguments.get("columnName").getValue() != null) { + columnName = arguments.get("columnName").getValue().toString(); + } + return columnName; + } + + @Override + public Transpose attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitTranspose(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index f63b4b1e2e0..e9efdb5eaf0 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -53,6 +53,7 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFamily; import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexCorrelVariable; import org.apache.calcite.rex.RexInputRef; @@ -62,6 +63,7 @@ import org.apache.calcite.rex.RexWindowBounds; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.fun.SqlTrimFunction; import org.apache.calcite.sql.type.ArraySqlType; import org.apache.calcite.sql.type.MapSqlType; import org.apache.calcite.sql.type.SqlTypeFamily; @@ -696,6 +698,73 @@ public RelNode visitReverse( return context.relBuilder.peek(); } + @Override + public RelNode visitTranspose( + org.opensearch.sql.ast.tree.Transpose node, CalcitePlanContext context) { + + visitChildren(node, context); + + int maxRows = + Optional.ofNullable(node.getMaxRows()) + .filter(r -> r > 0) + .orElseThrow(() -> new IllegalArgumentException("maxRows must be positive")); + + String columnName = node.getColumnName(); + List fieldNames = context.relBuilder.peek().getRowType().getFieldNames(); + + RelBuilder b = context.relBuilder; + RexBuilder rx = context.rexBuilder; + RelDataType varchar = rx.getTypeFactory().createSqlType(SqlTypeName.VARCHAR); + + // Step 1: ROW_NUMBER + b.projectPlus( + b.aggregateCall(SqlStdOperatorTable.ROW_NUMBER) + .over() + .rowsTo(RexWindowBounds.CURRENT_ROW) + .as(PlanUtils.ROW_NUMBER_COLUMN_FOR_TRANSPOSE)); + + // Step 2: UNPIVOT + b.unpivot( + false, + ImmutableList.of("value"), + ImmutableList.of(columnName), + fieldNames.stream() + .map( + f -> + Map.entry( + ImmutableList.of(rx.makeLiteral(f)), + ImmutableList.of((RexNode) rx.makeCast(varchar, b.field(f), true)))) + .collect(Collectors.toList())); + + // Step 3: Trim spaces from columnName column before pivot + + RexNode trimmedColumnName = + context.rexBuilder.makeCall( + SqlStdOperatorTable.TRIM, + context.rexBuilder.makeFlag(SqlTrimFunction.Flag.BOTH), + context.rexBuilder.makeLiteral(" "), + b.field(columnName)); + + // Step 4: PIVOT + b.pivot( + b.groupKey(trimmedColumnName), + ImmutableList.of(b.max(b.field("value"))), + ImmutableList.of(b.field(PlanUtils.ROW_NUMBER_COLUMN_FOR_TRANSPOSE)), + IntStream.rangeClosed(1, maxRows) + .mapToObj(i -> Map.entry("row " + i, ImmutableList.of((RexNode) b.literal(i)))) + .collect(Collectors.toList())); + + // Step 4: RENAME + List cleanNames = new ArrayList<>(); + cleanNames.add(columnName); + for (int i = 1; i <= maxRows; i++) { + cleanNames.add("row " + i); + } + b.rename(cleanNames); + + return b.peek(); + } + @Override public RelNode visitBin(Bin node, CalcitePlanContext context) { visitChildren(node, context); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index d89c36601ef..11c0989900e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -84,6 +84,7 @@ public interface PlanUtils { String ROW_NUMBER_COLUMN_FOR_SUBSEARCH = "_row_number_subsearch_"; String ROW_NUMBER_COLUMN_FOR_STREAMSTATS = "__stream_seq__"; String ROW_NUMBER_COLUMN_FOR_CHART = "_row_number_chart_"; + String ROW_NUMBER_COLUMN_FOR_TRANSPOSE = "_row_number_transpose_"; static SpanUnit intervalUnitToSpanUnit(IntervalUnit unit) { return switch (unit) { diff --git a/docs/category.json b/docs/category.json index 094768d1e6f..620ebc90ade 100644 --- a/docs/category.json +++ b/docs/category.json @@ -43,6 +43,7 @@ "user/ppl/cmd/timechart.md", "user/ppl/cmd/top.md", "user/ppl/cmd/trendline.md", + "user/ppl/cmd/transpose.md", "user/ppl/cmd/where.md", "user/ppl/functions/aggregations.md", "user/ppl/functions/collection.md", diff --git a/docs/user/ppl/cmd/transpose.md b/docs/user/ppl/cmd/transpose.md new file mode 100644 index 00000000000..442d4b9716d --- /dev/null +++ b/docs/user/ppl/cmd/transpose.md @@ -0,0 +1,92 @@ +# transpose + +## Description + +The `transpose` command outputs the requested number of rows as columns, effectively transposing each result row into a corresponding column of field values. + +## Syntax + +transpose [int] [column_name=] + +* number-of-rows: optional. The number of rows to transform into columns. Default value is 5. Maximum allowed is 10000. +* column_name: optional. The name of the first column to use when transposing rows. This column holds the field names. + + +## Example 1: Transpose results + +This example shows transposing wihtout any parameters. It transforms 5 rows into columns as default is 5. + +```ppl +source=accounts +| head 5 +| fields account_number, firstname, lastname, balance +| transpose +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-------+--------+---------+-------+-------+ +| column | row 1 | row 2 | row 3 | row 4 | row 5 | +|----------------+-------+--------+---------+-------+-------| +| account_number | 1 | 6 | 13 | 18 | null | +| firstname | Amber | Hattie | Nanette | Dale | null | +| balance | 39225 | 5686 | 32838 | 4180 | null | +| lastname | Duke | Bond | Bates | Adams | null | ++----------------+-------+--------+---------+-------+-------+ +``` + +## Example 2: Tranpose results up to a provided number of rows. + +This example shows transposing wihtout any parameters. It transforms 4 rows into columns as default is 5. + +```ppl +source=accounts +| head 5 +| fields account_number, firstname, lastname, balance +| transpose 4 +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-------+--------+---------+-------+ +| column | row 1 | row 2 | row 3 | row 4 | +|----------------+-------+--------+---------+-------| +| account_number | 1 | 6 | 13 | 18 | +| firstname | Amber | Hattie | Nanette | Dale | +| balance | 39225 | 5686 | 32838 | 4180 | +| lastname | Duke | Bond | Bates | Adams | ++----------------+-------+--------+---------+-------+ +``` + +## Example 2: Tranpose results up to a provided number of rows and first column with specified column name. + +This example shows transposing wihtout any parameters. It transforms 4 rows into columns as default is 5. + +```ppl +source=accounts +| head 5 +| fields account_number, firstname, lastname, balance +| transpose 4 column_name='column_names' +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-------+--------+---------+-------+ +| column_names | row 1 | row 2 | row 3 | row 4 | +|----------------+-------+--------+---------+-------| +| account_number | 1 | 6 | 13 | 18 | +| firstname | Amber | Hattie | Nanette | Dale | +| balance | 39225 | 5686 | 32838 | 4180 | +| lastname | Duke | Bond | Bates | Adams | ++----------------+-------+--------+---------+-------+ +``` + +## Limitations + +The `transpose` command transforms up to a number of rows specified and if not enough rows found, it shows those transposed rows as null columns. \ No newline at end of file diff --git a/docs/user/ppl/index.md b/docs/user/ppl/index.md index 30ad7159182..6728ff5be77 100644 --- a/docs/user/ppl/index.md +++ b/docs/user/ppl/index.md @@ -78,9 +78,10 @@ source=accounts | [describe command](cmd/describe.md) | 2.1 | stable (since 2.1) | Query the metadata of an index. | | [explain command](cmd/explain.md) | 3.1 | stable (since 3.1) | Explain the plan of query. | | [show datasources command](cmd/showdatasources.md) | 2.4 | stable (since 2.4) | Query datasources configured in the PPL engine. | - | [addtotals command](cmd/addtotals.md) | 3.4 | stable (since 3.4) | Adds row and column values and appends a totals column and row. | - | [addcoltotals command](cmd/addcoltotals.md) | 3.4 | stable (since 3.4) | Adds column values and appends a totals row. | - +| [addtotals command](cmd/addtotals.md) | 3.5 | stable (since 3.5) | Adds row and column values and appends a totals column and row. | +| [addcoltotals command](cmd/addcoltotals.md) | 3.5 | stable (since 3.5) | Adds column values and appends a totals row. | +| [transpose command](cmd/transpose.md) | 3.5 | stable (since 3.5) | Transpose rows to columns. | + - [Syntax](cmd/syntax.md) - PPL query structure and command syntax formatting * **Functions** - [Aggregation Functions](functions/aggregations.md) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index c254fb47c44..2babda91636 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -103,6 +103,7 @@ CalciteTextFunctionIT.class, CalciteTopCommandIT.class, CalciteTrendlineCommandIT.class, + CalciteTransposeCommandIT.class, CalciteVisualizationFormatIT.class, CalciteWhereCommandIT.class, CalcitePPLTpchIT.class diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index cdea1738eb0..af8a22bb9f9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2021,6 +2021,18 @@ public void testaddColTotalsExplain() throws IOException { + "| addcoltotals balance age label='GrandTotal'")); } + @Test + public void testTransposeExplain() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_transpose.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account" + + "| head 5 " + + "| transpose 4 column_name='column_names'")); + } + public void testComplexDedup() throws IOException { enabledOnlyWhenPushdownIsEnabled(); String expected = loadExpectedPlan("explain_dedup_complex1.yaml"); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java new file mode 100644 index 00000000000..44df58b7ab8 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java @@ -0,0 +1,173 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; +import static org.opensearch.sql.util.MatcherUtils.*; +import static org.opensearch.sql.util.MatcherUtils.rows; + +import java.io.IOException; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteTransposeCommandIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.ACCOUNT); + loadIndex(Index.BANK); + } + + /** + * default test without parameters on account index + * + * @throws IOException + */ + @Test + public void testTranspose() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 5 | fields firstname, age, balance | transpose", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + + // Iterate through all data rows + verifyDataRows( + result, + rows("firstname", "Amber", "Hattie", "Nanette", "Dale", "Elinor"), + rows("balance", "39225", "5686", "32838", "4180", "16418"), + rows("age", "32", "36", "28", "33", "36")); + } + + @Test + public void testTransposeLimit() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 10 | fields firstname , age, balance | transpose 14", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string"), + schema("row 6", "string"), + schema("row 7", "string"), + schema("row 8", "string"), + schema("row 9", "string"), + schema("row 10", "string"), + schema("row 11", "string"), + schema("row 12", "string"), + schema("row 13", "string"), + schema("row 14", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + // Iterate through all data rows + verifyDataRows( + result, + rows( + "firstname", + "Amber", + "Hattie", + "Nanette", + "Dale", + "Elinor", + "Virginia", + "Dillard", + "Mcgee", + "Aurelia", + "Fulton", + null, + null, + null, + null), + rows( + "balance", "39225", "5686", "32838", "4180", "16418", "40540", "48086", "18612", + "34487", "29104", null, null, null, null), + rows( + "age", "32", "36", "28", "33", "36", "39", "34", "39", "37", "23", null, null, null, + null)); + } + + @Test + public void testTransposeLowerLimit() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 15 | fields firstname , age, balance | transpose 5", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + // Iterate through all data rows + verifyDataRows( + result, + rows("firstname", "Amber", "Hattie", "Nanette", "Dale", "Elinor"), + rows("balance", "39225", "5686", "32838", "4180", "16418"), + rows("age", "32", "36", "28", "33", "36")); + } + + @Test + public void testTransposeColumnName() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 5 | fields firstname, age, balance | transpose 5" + + " column_name='column_names'", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column_names", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + + // Iterate through all data rows + verifyDataRows( + result, + rows("firstname", "Amber", "Hattie", "Nanette", "Dale", "Elinor"), + rows("balance", "39225", "5686", "32838", "4180", "16418"), + rows("age", "32", "36", "28", "33", "36")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java index 15f3c508b14..9783c8ae169 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java @@ -202,6 +202,17 @@ public void testAddColTotalCommand() throws IOException { } } + @Test + public void testTransposeCommand() throws IOException { + JSONObject result; + try { + executeQuery(String.format("search source=%s | transpose ", TEST_INDEX_BANK)); + } catch (ResponseException e) { + result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + verifyQuery(result); + } + } + private void verifyQuery(JSONObject result) throws IOException { if (isCalciteEnabled()) { assertFalse(result.getJSONArray("datarows").isEmpty()); diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java index 7ee90dc4640..9e1ffb0bf07 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java @@ -272,6 +272,24 @@ public void testCrossClusterAddColTotals() throws IOException { result, rows("Hattie", 36, 5686), rows("Nanette", 28, 32838), rows(null, 64, 38524)); } + @Test + public void testCrossClusterTranspose() throws IOException { + // Test query_string without fields parameter on remote cluster + JSONObject result = + executeQuery( + String.format( + "search source=%s | where firstname='Hattie' or firstname ='Nanette' or" + + " firstname='Dale'|sort firstname desc |fields firstname,age,balance |" + + " transpose 3 column_name='column_names'", + TEST_INDEX_BANK_REMOTE)); + + verifyDataRows( + result, + rows("firstname", "Nanette", "Hattie", "Dale"), + rows("balance", "32838", "5686", "4180"), + rows("age", "28", "36", "33")); + } + @Test public void testCrossClusterAppend() throws IOException { // TODO: We should enable calcite by default in CrossClusterSearchIT? diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml new file mode 100644 index 00000000000..dbd3a80ba3d --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml @@ -0,0 +1,21 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(column_names=[$0], row 1=[$1], row 2=[$2], row 3=[$3], row 4=[$4]) + LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5]) + LogicalProject(value=[CAST($19):VARCHAR NOT NULL], $f20=[TRIM(FLAG(BOTH), ' ', $18)], $f21=[=($17, 1)], $f22=[=($17, 2)], $f23=[=($17, 3)], $f24=[=($17, 4)]) + LogicalFilter(condition=[IS NOT NULL($19)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[$17], column_names=[$18], value=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, =($18, '_id'), CAST($11):VARCHAR NOT NULL, =($18, '_index'), CAST($12):VARCHAR NOT NULL, =($18, '_score'), NUMBER_TO_STRING($13), =($18, '_maxscore'), NUMBER_TO_STRING($14), =($18, '_sort'), CAST($15):VARCHAR NOT NULL, =($18, '_routing'), CAST($16):VARCHAR NOT NULL, null:NULL)]) + LogicalJoin(condition=[true], joinType=[inner]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], _row_number_transpose_=[ROW_NUMBER() OVER ()]) + LogicalSort(fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalValues(tuples=[[{ 'account_number' }, { 'firstname' }, { 'address' }, { 'balance' }, { 'gender' }, { 'city' }, { 'employer' }, { 'state' }, { 'age' }, { 'email' }, { 'lastname' }, { '_id' }, { '_index' }, { '_score' }, { '_maxscore' }, { '_sort' }, { '_routing' }]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0) FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['account_number'], expr#20=[=($t18, $t19)], expr#21=[CAST($t0):VARCHAR NOT NULL], expr#22=['firstname'], expr#23=[=($t18, $t22)], expr#24=[CAST($t1):VARCHAR NOT NULL], expr#25=['address'], expr#26=[=($t18, $t25)], expr#27=[CAST($t2):VARCHAR NOT NULL], expr#28=['balance'], expr#29=[=($t18, $t28)], expr#30=[CAST($t3):VARCHAR NOT NULL], expr#31=['gender'], expr#32=[=($t18, $t31)], expr#33=[CAST($t4):VARCHAR NOT NULL], expr#34=['city'], expr#35=[=($t18, $t34)], expr#36=[CAST($t5):VARCHAR NOT NULL], expr#37=['employer'], expr#38=[=($t18, $t37)], expr#39=[CAST($t6):VARCHAR NOT NULL], expr#40=['state'], expr#41=[=($t18, $t40)], expr#42=[CAST($t7):VARCHAR NOT NULL], expr#43=['age'], expr#44=[=($t18, $t43)], expr#45=[CAST($t8):VARCHAR NOT NULL], expr#46=['email'], expr#47=[=($t18, $t46)], expr#48=[CAST($t9):VARCHAR NOT NULL], expr#49=['lastname'], expr#50=[=($t18, $t49)], expr#51=[CAST($t10):VARCHAR NOT NULL], expr#52=['_id'], expr#53=[=($t18, $t52)], expr#54=[CAST($t11):VARCHAR NOT NULL], expr#55=['_index'], expr#56=[=($t18, $t55)], expr#57=[CAST($t12):VARCHAR NOT NULL], expr#58=['_score'], expr#59=[=($t18, $t58)], expr#60=[NUMBER_TO_STRING($t13)], expr#61=['_maxscore'], expr#62=[=($t18, $t61)], expr#63=[NUMBER_TO_STRING($t14)], expr#64=['_sort'], expr#65=[=($t18, $t64)], expr#66=[CAST($t15):VARCHAR NOT NULL], expr#67=['_routing'], expr#68=[=($t18, $t67)], expr#69=[CAST($t16):VARCHAR NOT NULL], expr#70=[null:NULL], expr#71=[CASE($t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t47, $t48, $t50, $t51, $t53, $t54, $t56, $t57, $t59, $t60, $t62, $t63, $t65, $t66, $t68, $t69, $t70)], expr#72=[CAST($t71):VARCHAR NOT NULL], expr#73=[FLAG(BOTH)], expr#74=[' '], expr#75=[TRIM($t73, $t74, $t18)], expr#76=[1], expr#77=[=($t17, $t76)], expr#78=[2], expr#79=[=($t17, $t78)], expr#80=[3], expr#81=[=($t17, $t80)], expr#82=[4], expr#83=[=($t17, $t82)], value=[$t72], $f20=[$t75], $f21=[$t77], $f22=[$t79], $f23=[$t81], $f24=[$t83]) + EnumerableNestedLoopJoin(condition=[CASE(SEARCH($18, Sarg['_id':CHAR(14), '_index':CHAR(14), 'account_number', 'address':CHAR(14), 'age':CHAR(14), 'balance':CHAR(14), 'city':CHAR(14), 'email':CHAR(14), 'employer':CHAR(14), 'firstname':CHAR(14), 'gender':CHAR(14), 'lastname':CHAR(14), 'state':CHAR(14)]:CHAR(14)), true, =($18, '_score'), IS NOT NULL(NUMBER_TO_STRING($13)), =($18, '_maxscore'), IS NOT NULL(NUMBER_TO_STRING($14)), SEARCH($18, Sarg['_routing', '_sort':CHAR(8)]:CHAR(8)), true, false)], joinType=[inner]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m"}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + EnumerableValues(tuples=[[{ 'account_number' }, { 'firstname' }, { 'address' }, { 'balance' }, { 'gender' }, { 'city' }, { 'employer' }, { 'state' }, { 'age' }, { 'email' }, { 'lastname' }, { '_id' }, { '_index' }, { '_score' }, { '_maxscore' }, { '_sort' }, { '_routing' }]]) \ No newline at end of file diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 71162e81bd8..c16d15c30b8 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -46,6 +46,7 @@ ML: 'ML'; FILLNULL: 'FILLNULL'; FLATTEN: 'FLATTEN'; TRENDLINE: 'TRENDLINE'; +TRANSPOSE: 'TRANSPOSE'; CHART: 'CHART'; TIMECHART: 'TIMECHART'; APPENDCOL: 'APPENDCOL'; @@ -70,6 +71,7 @@ LABEL: 'LABEL'; SHOW_NUMBERED_TOKEN: 'SHOW_NUMBERED_TOKEN'; AGGREGATION: 'AGGREGATION'; APPENDPIPE: 'APPENDPIPE'; +COLUMN_NAME: 'COLUMN_NAME'; //Native JOIN KEYWORDS JOIN: 'JOIN'; @@ -157,6 +159,7 @@ INPUT: 'INPUT'; OUTPUT: 'OUTPUT'; PATH: 'PATH'; + // COMPARISON FUNCTION KEYWORDS CASE: 'CASE'; ELSE: 'ELSE'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 7045796a03c..4edb0344e57 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -85,6 +85,7 @@ commands | regexCommand | chartCommand | timechartCommand + | transposeCommand | rexCommand | appendPipeCommand | replaceCommand @@ -131,6 +132,7 @@ commandName | REX | APPENDPIPE | REPLACE + | TRANSPOSE ; searchCommand @@ -328,6 +330,16 @@ timechartCommand : TIMECHART timechartParameter* statsAggTerm (BY fieldExpression)? timechartParameter* ; +transposeCommand + : TRANSPOSE transposeParameter* + ; + +transposeParameter + : (number = integerLiteral) + | (COLUMN_NAME EQUAL stringLiteral) + ; + + timechartParameter : LIMIT EQUAL integerLiteral | SPAN EQUAL spanLiteral @@ -1657,5 +1669,6 @@ searchableKeyWord | FIELDNAME | ROW | COL + | COLUMN_NAME ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 3f4f3049365..0c93998f768 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -111,6 +111,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Window; @@ -736,6 +737,13 @@ public UnresolvedPlan visitReverseCommand(OpenSearchPPLParser.ReverseCommandCont return new Reverse(); } + /** Transpose command. */ + @Override + public UnresolvedPlan visitTransposeCommand(OpenSearchPPLParser.TransposeCommandContext ctx) { + java.util.Map arguments = ArgumentFactory.getArgumentList(ctx); + return new Transpose(arguments); + } + /** Chart command. */ @Override public UnresolvedPlan visitChartCommand(OpenSearchPPLParser.ChartCommandContext ctx) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index ed76b29b77a..72090e2f069 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -8,7 +8,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import org.antlr.v4.runtime.ParserRuleContext; import org.opensearch.sql.ast.dsl.AstDSL; @@ -308,6 +310,30 @@ public static List getArgumentList( return arguments; } + public static Map getArgumentList( + OpenSearchPPLParser.TransposeCommandContext transposeCommandContext) { + Map arguments = new HashMap<>(); + for (OpenSearchPPLParser.TransposeParameterContext ctx : + transposeCommandContext.transposeParameter()) { + + if (ctx.COLUMN_NAME() != null) { + if (ctx.stringLiteral() == null) { + throw new IllegalArgumentException("COLUMN_NAME requires a string literal value"); + } + Literal columnName = getArgumentValue(ctx.stringLiteral()); + arguments.put("columnName", new Argument("columnName", columnName)); + } else if (ctx.number != null) { + + arguments.put("number", new Argument("number", getArgumentValue(ctx.number))); + } else { + throw new IllegalArgumentException( + String.format( + "A parameter of transpose must be a int limit, column_name , got %s", ctx)); + } + } + return arguments; + } + /** * Get list of {@link Argument}. * diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 7d04ad8e6ad..41656061611 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -100,6 +100,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; @@ -636,6 +637,27 @@ public String visitTrendline(Trendline node, String context) { return StringUtils.format("%s | trendline %s", child, computations); } + @Override + public String visitTranspose(Transpose node, String context) { + if (node.getChild().isEmpty()) { + return "source=*** | transpose"; + } + String child = node.getChild().get(0).accept(this, context); + StringBuilder anonymized = new StringBuilder(StringUtils.format("%s | transpose", child)); + java.util.Map arguments = node.getArguments(); + + if (arguments.containsKey("number")) { + Argument numberArg = arguments.get("number"); + if (numberArg != null) { + anonymized.append(StringUtils.format(" %s", numberArg.getValue())); + } + } + if (arguments.containsKey("columnName")) { + anonymized.append(StringUtils.format(" %s=***", "column_name")); + } + return anonymized.toString(); + } + @Override public String visitAppendCol(AppendCol node, String context) { String child = node.getChild().get(0).accept(this, context); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java new file mode 100644 index 00000000000..b6b60c530e7 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java @@ -0,0 +1,256 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; + +public class CalcitePPLTransposeTest extends CalcitePPLAbstractTest { + + public CalcitePPLTransposeTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testSimpleCountWithTranspose() { + String ppl = "source=EMP | stats count() as c|transpose"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(column=[$0], row 1=[$1], row 2=[$2], row 3=[$3], row 4=[$4], row 5=[$5])\n" + + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + + " FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5], row" + + " 5_null=[MAX($0) FILTER $6])\n" + + " LogicalProject(value=[CAST($3):VARCHAR NOT NULL], $f4=[TRIM(FLAG(BOTH), ' '," + + " $2)], $f5=[=($1, 1)], $f6=[=($1, 2)], $f7=[=($1, 3)], $f8=[=($1, 4)], $f9=[=($1," + + " 5)])\n" + + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + + " LogicalProject(c=[$0], _row_number_transpose_=[$1], column=[$2]," + + " value=[CASE(=($2, 'c'), CAST($0):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(c=[$0], _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + + " LogicalAggregate(group=[{}], c=[COUNT()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'c' }]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "column=c; row 1=14; row 2=null; row 3=null; row 4=null; row 5=null\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT TRIM(`column`) `column`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 3) `row 3`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 4) `row 4`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 5) `row 5`\n" + + "FROM (SELECT `t0`.`c`, `t0`.`_row_number_transpose_`, `t1`.`column`, CASE WHEN" + + " `t1`.`column` = 'c' THEN CAST(`t0`.`c` AS STRING) ELSE NULL END `value`\n" + + "FROM (SELECT COUNT(*) `c`, ROW_NUMBER() OVER () `_row_number_transpose_`\n" + + "FROM `scott`.`EMP`) `t0`\n" + + "CROSS JOIN (VALUES ('c')) `t1` (`column`)) `t2`\n" + + "WHERE `t2`.`value` IS NOT NULL\n" + + "GROUP BY TRIM(`column`)"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMultipleAggregatesWithAliasesTranspose() { + String ppl = + "source=EMP | stats avg(SAL) as avg_sal, max(SAL) as max_sal, min(SAL) as min_sal, count()" + + " as cnt|transpose "; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(column=[$0], row 1=[$1], row 2=[$2], row 3=[$3], row 4=[$4], row 5=[$5])\n" + + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + + " FILTER $3], row 3_null=[MAX($0) FILTER $4], row 4_null=[MAX($0) FILTER $5], row" + + " 5_null=[MAX($0) FILTER $6])\n" + + " LogicalProject(value=[CAST($6):VARCHAR NOT NULL], $f7=[TRIM(FLAG(BOTH), ' '," + + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)], $f11=[=($4, 4)], $f12=[=($4," + + " 5)])\n" + + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," + + " _row_number_transpose_=[$4], column=[$5], value=[CASE(=($5, 'avg_sal')," + + " NUMBER_TO_STRING($0), =($5, 'max_sal'), NUMBER_TO_STRING($1), =($5, 'min_sal')," + + " NUMBER_TO_STRING($2), =($5, 'cnt'), CAST($3):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," + + " _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)], max_sal=[MAX($0)]," + + " min_sal=[MIN($0)], cnt=[COUNT()])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'avg_sal' }, { 'max_sal' }, { 'min_sal' }, {" + + " 'cnt' }]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = + "column=avg_sal; row 1=2073.214285; row 2=null; row 3=null; row 4=null; row 5=null\n" + + "column=max_sal; row 1=5000.00; row 2=null; row 3=null; row 4=null; row 5=null\n" + + "column=cnt; row 1=14; row 2=null; row 3=null; row 4=null; row 5=null\n" + + "column=min_sal; row 1=800.00; row 2=null; row 3=null; row 4=null; row 5=null\n"; + + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT TRIM(`column`) `column`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 3) `row 3`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 4) `row 4`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 5) `row 5`\n" + + "FROM (SELECT `t1`.`avg_sal`, `t1`.`max_sal`, `t1`.`min_sal`, `t1`.`cnt`," + + " `t1`.`_row_number_transpose_`, `t2`.`column`, CASE WHEN `t2`.`column` = 'avg_sal'" + + " THEN NUMBER_TO_STRING(`t1`.`avg_sal`) WHEN `t2`.`column` = 'max_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`max_sal`) WHEN `t2`.`column` = 'min_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`min_sal`) WHEN `t2`.`column` = 'cnt' THEN CAST(`t1`.`cnt` AS" + + " STRING) ELSE NULL END `value`\n" + + "FROM (SELECT AVG(`SAL`) `avg_sal`, MAX(`SAL`) `max_sal`, MIN(`SAL`) `min_sal`," + + " COUNT(*) `cnt`, ROW_NUMBER() OVER () `_row_number_transpose_`\n" + + "FROM `scott`.`EMP`) `t1`\n" + + "CROSS JOIN (VALUES ('avg_sal'),\n" + + "('max_sal'),\n" + + "('min_sal'),\n" + + "('cnt')) `t2` (`column`)) `t3`\n" + + "WHERE `t3`.`value` IS NOT NULL\n" + + "GROUP BY TRIM(`column`)"; + + /* + "SELECT `column`, MAX(CASE WHEN `__row_id__` = 1 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 1`, MAX(CASE WHEN `__row_id__` = 2 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 2`, MAX(CASE WHEN `__row_id__` = 3 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 3`, MAX(CASE WHEN `__row_id__` = 4 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 4`, MAX(CASE WHEN `__row_id__` = 5 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 5`\n" + + "FROM (SELECT `t1`.`avg_sal`, `t1`.`max_sal`, `t1`.`min_sal`, `t1`.`cnt`," + + " `t1`.`__row_id__`, `t2`.`column`, CASE WHEN `t2`.`column` = 'avg_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`avg_sal`) WHEN `t2`.`column` = 'max_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`max_sal`) WHEN `t2`.`column` = 'min_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`min_sal`) WHEN `t2`.`column` = 'cnt' THEN CAST(`t1`.`cnt` AS" + + " STRING) ELSE NULL END `value`\n" + + "FROM (SELECT AVG(`SAL`) `avg_sal`, MAX(`SAL`) `max_sal`, MIN(`SAL`) `min_sal`," + + " COUNT(*) `cnt`, ROW_NUMBER() OVER () `__row_id__`\n" + + "FROM `scott`.`EMP`) `t1`\n" + + "CROSS JOIN (VALUES ('avg_sal'),\n" + + "('max_sal'),\n" + + "('min_sal'),\n" + + "('cnt')) `t2` (`column`)) `t3`\n" + + "WHERE `t3`.`value` IS NOT NULL\n" + + "GROUP BY `column`"; + + */ + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testTransposeWithLimit() { + String ppl = "source=EMP | fields ENAME, COMM, JOB, SAL | transpose 3"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(column=[$0], row 1=[$1], row 2=[$2], row 3=[$3])\n" + + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + + " FILTER $3], row 3_null=[MAX($0) FILTER $4])\n" + + " LogicalProject(value=[CAST($6):VARCHAR NOT NULL], $f7=[TRIM(FLAG(BOTH), ' '," + + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)])\n" + + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3]," + + " _row_number_transpose_=[$4], column=[$5], value=[CASE(=($5, 'ENAME')," + + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," + + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," + + " _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM' }, { 'JOB' }, { 'SAL'" + + " }]])\n"; + + verifyLogical(root, expectedLogical); + String expectedResult = + "column=ENAME; row 1=SMITH; row 2=ALLEN; row 3=WARD\n" + + "column=COMM; row 1=null; row 2=300.00; row 3=500.00\n" + + "column=JOB; row 1=CLERK; row 2=SALESMAN; row 3=SALESMAN\n" + + "column=SAL; row 1=800.00; row 2=1600.00; row 3=1250.00\n"; + + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT TRIM(`column`) `column`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 3) `row 3`\n" + + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`," + + " `t`.`_row_number_transpose_`, `t0`.`column`, CASE WHEN `t0`.`column` = 'ENAME' THEN" + + " CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column` = 'COMM' THEN" + + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column` = 'JOB' THEN CAST(`t`.`JOB` AS" + + " STRING) WHEN `t0`.`column` = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE NULL END" + + " `value`\n" + + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER ()" + + " `_row_number_transpose_`\n" + + "FROM `scott`.`EMP`) `t`\n" + + "CROSS JOIN (VALUES ('ENAME'),\n" + + "('COMM'),\n" + + "('JOB'),\n" + + "('SAL')) `t0` (`column`)) `t1`\n" + + "WHERE `t1`.`value` IS NOT NULL\n" + + "GROUP BY TRIM(`column`)"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testTransposeWithLimitColumnName() { + String ppl = + "source=EMP | fields ENAME, COMM, JOB, SAL | transpose 3 column_name='column_names'"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(column_names=[$0], row 1=[$1], row 2=[$2], row 3=[$3])\n" + + " LogicalAggregate(group=[{1}], row 1_null=[MAX($0) FILTER $2], row 2_null=[MAX($0)" + + " FILTER $3], row 3_null=[MAX($0) FILTER $4])\n" + + " LogicalProject(value=[CAST($6):VARCHAR NOT NULL], $f7=[TRIM(FLAG(BOTH), ' '," + + " $5)], $f8=[=($4, 1)], $f9=[=($4, 2)], $f10=[=($4, 3)])\n" + + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3]," + + " _row_number_transpose_=[$4], column_names=[$5], value=[CASE(=($5, 'ENAME')," + + " CAST($0):VARCHAR NOT NULL, =($5, 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB')," + + " CAST($2):VARCHAR NOT NULL, =($5, 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," + + " _row_number_transpose_=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM' }, { 'JOB' }, { 'SAL'" + + " }]])\n"; + + verifyLogical(root, expectedLogical); + String expectedResult = + "column_names=ENAME; row 1=SMITH; row 2=ALLEN; row 3=WARD\n" + + "column_names=COMM; row 1=null; row 2=300.00; row 3=500.00\n" + + "column_names=JOB; row 1=CLERK; row 2=SALESMAN; row 3=SALESMAN\n" + + "column_names=SAL; row 1=800.00; row 2=1600.00; row 3=1250.00\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT TRIM(`column_names`) `column_names`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 1) `row 1`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 2) `row 2`, MAX(CAST(`value` AS STRING)) FILTER (WHERE" + + " `_row_number_transpose_` = 3) `row 3`\n" + + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`," + + " `t`.`_row_number_transpose_`, `t0`.`column_names`, CASE WHEN `t0`.`column_names` =" + + " 'ENAME' THEN CAST(`t`.`ENAME` AS STRING) WHEN `t0`.`column_names` = 'COMM' THEN" + + " NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column_names` = 'JOB' THEN CAST(`t`.`JOB`" + + " AS STRING) WHEN `t0`.`column_names` = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE" + + " NULL END `value`\n" + + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER ()" + + " `_row_number_transpose_`\n" + + "FROM `scott`.`EMP`) `t`\n" + + "CROSS JOIN (VALUES ('ENAME'),\n" + + "('COMM'),\n" + + "('JOB'),\n" + + "('SAL')) `t0` (`column_names`)) `t1`\n" + + "WHERE `t1`.`value` IS NOT NULL\n" + + "GROUP BY TRIM(`column_names`)"; + + verifyPPLToSparkSQL(root, expectedSparkSql); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 2fd08988f6b..e80b12caba8 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -247,6 +247,13 @@ public void testDedupCommand() { anonymize("source=t | dedup f1, f2")); } + @Test + public void testTransposeCommand() { + assertEquals( + "source=table | transpose 5 column_name=***", + anonymize("source=t | transpose 5 column_name='column_names'")); + } + @Test public void testTrendlineCommand() { assertEquals(