diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 24cef144c97..5a3d2b6070f 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -99,6 +99,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; @@ -703,6 +704,11 @@ public LogicalPlan visitML(ML node, AnalysisContext context) { return new LogicalML(child, node.getArguments()); } + @Override + public LogicalPlan visitTranspose(Transpose node, AnalysisContext context) { + throw getOnlyForCalciteException("Transpose"); + } + @Override public LogicalPlan visitBin(Bin node, AnalysisContext context) { throw getOnlyForCalciteException("Bin"); diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index a6ef5e7547a..be5bce69032 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -86,6 +86,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.Values; import org.opensearch.sql.ast.tree.Window; @@ -282,6 +283,10 @@ public T visitReverse(Reverse node, C context) { return visitChildren(node, context); } + public T visitTranspose(Transpose node, C context) { + return visitChildren(node, context); + } + public T visitChart(Chart node, C context) { return visitChildren(node, context); } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java new file mode 100644 index 00000000000..221eb73e9de --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/Transpose.java @@ -0,0 +1,66 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.*; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.common.utils.StringUtils; + +/** AST node represent Transpose operation. */ +@Getter +@Setter +@ToString +@EqualsAndHashCode(callSuper = false) +@RequiredArgsConstructor +public class Transpose extends UnresolvedPlan { + private final @NonNull java.util.Map arguments; + private UnresolvedPlan child; + private static final int max_limit = 1000; + + public Integer getMaxRows() { + Integer maxRows = 5; + if (arguments.containsKey("number") && arguments.get("number").getValue() != null) { + try { + maxRows = Integer.parseInt(arguments.get("number").getValue().toString()); + } catch (NumberFormatException e) { + // log warning and use default + maxRows = 5; + } + } + if (maxRows > max_limit) { + throw new IllegalArgumentException( + StringUtils.format("Maximum limit to transpose is %s", max_limit)); + } + return maxRows; + } + + public String getColumnName() { + String columnName = "column"; + if (arguments.containsKey("columnName") && arguments.get("columnName").getValue() != null) { + columnName = arguments.get("columnName").getValue().toString(); + } + return columnName; + } + + @Override + public Transpose attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitTranspose(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index f63b4b1e2e0..736dad91114 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -29,6 +29,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Streams; +import java.util.AbstractMap; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; @@ -696,6 +697,110 @@ public RelNode visitReverse( return context.relBuilder.peek(); } + @Override + public RelNode visitTranspose( + org.opensearch.sql.ast.tree.Transpose node, CalcitePlanContext context) { + visitChildren(node, context); + Integer maxRows = node.getMaxRows(); + if (maxRows == null || maxRows <= 0) { + throw new IllegalArgumentException("maxRows must be a positive integer"); + } + String columnName = node.getColumnName(); + // Get the current schema to transpose + RelNode currentNode = context.relBuilder.peek(); + List fieldNames = currentNode.getRowType().getFieldNames(); + List fields = currentNode.getRowType().getFieldList(); + if (fieldNames.isEmpty()) { + return currentNode; + } + + // Add row numbers to identify each row uniquely + RexNode rowNumber = + context + .relBuilder + .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) + .over() + .rowsTo(RexWindowBounds.CURRENT_ROW) + .as("__row_id__"); + context.relBuilder.projectPlus(rowNumber); + + // Unpivot the data - convert columns to rows + // Each field becomes a row with: row_id, column, value + List measureColumns = ImmutableList.of("value"); + List axisColumns = ImmutableList.of(columnName); + + // Create the unpivot value mappings + List, List>> valueMappings = new ArrayList<>(); + RelDataType varcharType = + context.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR); + + for (String fieldName : fieldNames) { + if (fieldName.equals("__row_id__")) { + continue; // Skip the row number column + } + + // Create the axis value (column name as literal) + RexLiteral columnNameLiteral = context.rexBuilder.makeLiteral(fieldName); + List axisValues = ImmutableList.of(columnNameLiteral); + + // Create the measure value (field expression cast to VARCHAR) + RexNode fieldValue = context.relBuilder.field(fieldName); + RexNode castValue = context.rexBuilder.makeCast(varcharType, fieldValue, true); + List measureValues = ImmutableList.of(castValue); + + // Create the mapping entry + valueMappings.add(new AbstractMap.SimpleEntry<>(axisValues, measureValues)); + } + + // Apply the unpivot operation + context.relBuilder.unpivot( + false, // includeNulls = false + measureColumns, // measure column names: ["value"] + axisColumns, // axis column names: ["column"] + valueMappings // field mappings + ); + + // Pivot the data to transpose rows as columns + // Pivot on __row_id__ with column as the grouping key + // This creates: column, row1, row2, row3, ... + + // Create conditional aggregations for each row position + // We'll use ROW_NUMBER to determine the row positions dynamically + RexNode rowPos = + context + .relBuilder + .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) + .over() + .partitionBy(context.relBuilder.field(columnName)) + .orderBy(context.relBuilder.field("__row_id__")) + .rowsTo(RexWindowBounds.CURRENT_ROW) + .as("__row_pos__"); + context.relBuilder.projectPlus(rowPos); + + // Create aggregation calls for each possible row position + List pivotAggCalls = new ArrayList<>(); + + for (int i = 1; i <= maxRows; i++) { + // Create CASE WHEN __row_id__ = i THEN value END for each row position + RexNode caseExpr = + context.relBuilder.call( + SqlStdOperatorTable.CASE, + context.relBuilder.equals( + context.relBuilder.field("__row_id__"), context.relBuilder.literal(i)), + context.relBuilder.field("value"), + context.relBuilder.literal(null)); + + AggCall maxCase = context.relBuilder.max(caseExpr).as("row " + i); + pivotAggCalls.add(maxCase); + } + + // Group by column and apply the conditional aggregations + context.relBuilder.aggregate( + context.relBuilder.groupKey(context.relBuilder.field(columnName)), pivotAggCalls); + + return context.relBuilder.peek(); + } + @Override public RelNode visitBin(Bin node, CalcitePlanContext context) { visitChildren(node, context); diff --git a/docs/category.json b/docs/category.json index 094768d1e6f..620ebc90ade 100644 --- a/docs/category.json +++ b/docs/category.json @@ -43,6 +43,7 @@ "user/ppl/cmd/timechart.md", "user/ppl/cmd/top.md", "user/ppl/cmd/trendline.md", + "user/ppl/cmd/transpose.md", "user/ppl/cmd/where.md", "user/ppl/functions/aggregations.md", "user/ppl/functions/collection.md", diff --git a/docs/user/ppl/cmd/transpose.md b/docs/user/ppl/cmd/transpose.md new file mode 100644 index 00000000000..e177254ed12 --- /dev/null +++ b/docs/user/ppl/cmd/transpose.md @@ -0,0 +1,92 @@ +# transpose + +## Description + +The `transpose` command outputs the requested number of rows as columns, effectively transposing each result row into a corresponding column of field values. + +## Syntax + +transpose [int] [column_name=] + +* number-of-rows: optional. The number of rows to transform into columns. Default value is 5. Maximum allowed is 1000. +* column_name: optional. The name of the first column to use when transposing rows. This column holds the field names. + + +## Example 1: Transpose results + +This example shows transposing wihtout any parameters. It transforms 5 rows into columns as default is 5. + +```ppl +source=accounts +| head 5 +| fields account_number, firstname, lastname, balance +| transpose +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-------+--------+---------+-------+-------+ +| column | row 1 | row 2 | row 3 | row 4 | row 5 | +|----------------+-------+--------+---------+-------+-------| +| account_number | 1 | 6 | 13 | 18 | null | +| firstname | Amber | Hattie | Nanette | Dale | null | +| balance | 39225 | 5686 | 32838 | 4180 | null | +| lastname | Duke | Bond | Bates | Adams | null | ++----------------+-------+--------+---------+-------+-------+ +``` + +## Example 2: Tranpose results up to a provided number of rows. + +This example shows transposing wihtout any parameters. It transforms 4 rows into columns as default is 5. + +```ppl +source=accounts +| head 5 +| fields account_number, firstname, lastname, balance +| transpose 4 +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-------+--------+---------+-------+ +| column | row 1 | row 2 | row 3 | row 4 | +|----------------+-------+--------+---------+-------| +| account_number | 1 | 6 | 13 | 18 | +| firstname | Amber | Hattie | Nanette | Dale | +| balance | 39225 | 5686 | 32838 | 4180 | +| lastname | Duke | Bond | Bates | Adams | ++----------------+-------+--------+---------+-------+ +``` + +## Example 2: Tranpose results up to a provided number of rows and first column with specified column name. + +This example shows transposing wihtout any parameters. It transforms 4 rows into columns as default is 5. + +```ppl +source=accounts +| head 5 +| fields account_number, firstname, lastname, balance +| transpose 4 column_name='column_names' +``` + +Expected output: + +```text +fetched rows / total rows = 4/4 ++----------------+-------+--------+---------+-------+ +| column_names | row 1 | row 2 | row 3 | row 4 | +|----------------+-------+--------+---------+-------| +| account_number | 1 | 6 | 13 | 18 | +| firstname | Amber | Hattie | Nanette | Dale | +| balance | 39225 | 5686 | 32838 | 4180 | +| lastname | Duke | Bond | Bates | Adams | ++----------------+-------+--------+---------+-------+ +``` + +## Limitations + +The `transpose` command transforms up to a number of rows specified and if not enough rows found, it shows those transposed rows as null columns. \ No newline at end of file diff --git a/docs/user/ppl/index.md b/docs/user/ppl/index.md index 30ad7159182..6728ff5be77 100644 --- a/docs/user/ppl/index.md +++ b/docs/user/ppl/index.md @@ -78,9 +78,10 @@ source=accounts | [describe command](cmd/describe.md) | 2.1 | stable (since 2.1) | Query the metadata of an index. | | [explain command](cmd/explain.md) | 3.1 | stable (since 3.1) | Explain the plan of query. | | [show datasources command](cmd/showdatasources.md) | 2.4 | stable (since 2.4) | Query datasources configured in the PPL engine. | - | [addtotals command](cmd/addtotals.md) | 3.4 | stable (since 3.4) | Adds row and column values and appends a totals column and row. | - | [addcoltotals command](cmd/addcoltotals.md) | 3.4 | stable (since 3.4) | Adds column values and appends a totals row. | - +| [addtotals command](cmd/addtotals.md) | 3.5 | stable (since 3.5) | Adds row and column values and appends a totals column and row. | +| [addcoltotals command](cmd/addcoltotals.md) | 3.5 | stable (since 3.5) | Adds column values and appends a totals row. | +| [transpose command](cmd/transpose.md) | 3.5 | stable (since 3.5) | Transpose rows to columns. | + - [Syntax](cmd/syntax.md) - PPL query structure and command syntax formatting * **Functions** - [Aggregation Functions](functions/aggregations.md) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index c254fb47c44..2babda91636 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -103,6 +103,7 @@ CalciteTextFunctionIT.class, CalciteTopCommandIT.class, CalciteTrendlineCommandIT.class, + CalciteTransposeCommandIT.class, CalciteVisualizationFormatIT.class, CalciteWhereCommandIT.class, CalcitePPLTpchIT.class diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index cdea1738eb0..af8a22bb9f9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2021,6 +2021,18 @@ public void testaddColTotalsExplain() throws IOException { + "| addcoltotals balance age label='GrandTotal'")); } + @Test + public void testTransposeExplain() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String expected = loadExpectedPlan("explain_transpose.yaml"); + assertYamlEqualsIgnoreId( + expected, + explainQueryYaml( + "source=opensearch-sql_test_index_account" + + "| head 5 " + + "| transpose 4 column_name='column_names'")); + } + public void testComplexDedup() throws IOException { enabledOnlyWhenPushdownIsEnabled(); String expected = loadExpectedPlan("explain_dedup_complex1.yaml"); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java new file mode 100644 index 00000000000..1b4f8cf03d0 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteTransposeCommandIT.java @@ -0,0 +1,199 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; +import static org.opensearch.sql.util.MatcherUtils.*; +import static org.opensearch.sql.util.MatcherUtils.rows; + +import java.io.IOException; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +public class CalciteTransposeCommandIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.ACCOUNT); + loadIndex(Index.BANK); + } + + /** + * default test without parameters on account index + * + * @throws IOException + */ + @Test + public void testTranspose() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 5 | fields firstname, age, balance | transpose", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + + // Iterate through all data rows + verifyDataRows( + result, + rows("firstname", "Amber", "Hattie", "Nanette", "Dale", "Elinor"), + rows("balance ", "39225", "5686", "32838", "4180", "16418"), + rows("age ", "32", "36", "28", "33", "36")); + } + + @Test + public void testTransposeLimit() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 10 | fields firstname , age, balance | transpose 14", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string"), + schema("row 6", "string"), + schema("row 7", "string"), + schema("row 8", "string"), + schema("row 9", "string"), + schema("row 10", "string"), + schema("row 11", "string"), + schema("row 12", "string"), + schema("row 13", "string"), + schema("row 14", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + // Iterate through all data rows + verifyDataRows( + result, + rows( + "firstname", + "Amber", + "Hattie", + "Nanette", + "Dale", + "Elinor", + "Virginia", + "Dillard", + "Mcgee", + "Aurelia", + "Fulton", + null, + null, + null, + null), + rows( + "balance ", + "39225", + "5686", + "32838", + "4180", + "16418", + "40540", + "48086", + "18612", + "34487", + "29104", + null, + null, + null, + null), + rows( + "age ", + "32", + "36", + "28", + "33", + "36", + "39", + "34", + "39", + "37", + "23", + null, + null, + null, + null)); + } + + @Test + public void testTransposeLowerLimit() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 15 | fields firstname , age, balance | transpose 5", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + // Iterate through all data rows + verifyDataRows( + result, + rows("firstname", "Amber", "Hattie", "Nanette", "Dale", "Elinor"), + rows("balance ", "39225", "5686", "32838", "4180", "16418"), + rows("age ", "32", "36", "28", "33", "36")); + } + + @Test + public void testTransposeColumnName() throws IOException { + var result = + executeQuery( + String.format( + "source=%s | head 5 | fields firstname, age, balance | transpose 5" + + " column_name='column_names'", + TEST_INDEX_ACCOUNT)); + + // Verify that we get original rows plus totals row + verifySchema( + result, + schema("column_names", "string"), + schema("row 1", "string"), + schema("row 2", "string"), + schema("row 3", "string"), + schema("row 4", "string"), + schema("row 5", "string")); + + // Should have original data plus one totals row + var dataRows = result.getJSONArray("datarows"); + + // Iterate through all data rows + verifyDataRows( + result, + rows("firstname", "Amber", "Hattie", "Nanette", "Dale", "Elinor"), + rows("balance ", "39225", "5686", "32838", "4180", "16418"), + rows("age ", "32", "36", "28", "33", "36")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java index 15f3c508b14..9783c8ae169 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/NewAddedCommandsIT.java @@ -202,6 +202,17 @@ public void testAddColTotalCommand() throws IOException { } } + @Test + public void testTransposeCommand() throws IOException { + JSONObject result; + try { + executeQuery(String.format("search source=%s | transpose ", TEST_INDEX_BANK)); + } catch (ResponseException e) { + result = new JSONObject(TestUtils.getResponseBody(e.getResponse())); + verifyQuery(result); + } + } + private void verifyQuery(JSONObject result) throws IOException { if (isCalciteEnabled()) { assertFalse(result.getJSONArray("datarows").isEmpty()); diff --git a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java index 7ee90dc4640..6bd34297b9b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/security/CrossClusterSearchIT.java @@ -272,6 +272,23 @@ public void testCrossClusterAddColTotals() throws IOException { result, rows("Hattie", 36, 5686), rows("Nanette", 28, 32838), rows(null, 64, 38524)); } + @Test + public void testCrossClusterTranspose() throws IOException { + // Test query_string without fields parameter on remote cluster + JSONObject result = + executeQuery( + String.format( + "search source=%s | where firstname='Hattie' or firstname ='Nanette' or" + + " firstname='Dale'|sort firstname desc |fields firstname,age,balance |" + + " transpose 3 column_name='column_names'", + TEST_INDEX_BANK_REMOTE)); + verifyDataRows( + result, + rows("firstname", "Nanette", "Hattie", "Dale"), + rows("balance ", "32838", "5686", "4180"), + rows("age ", "28", "36", "33")); + } + @Test public void testCrossClusterAppend() throws IOException { // TODO: We should enable calcite by default in CrossClusterSearchIT? diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml new file mode 100644 index 00000000000..98a833a4bd5 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_transpose.yaml @@ -0,0 +1,20 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)], row 4=[MAX($4)]) + LogicalProject(column_names=[$18], $f21=[CASE(=($17, 1), CAST($19):VARCHAR NOT NULL, null:NULL)], $f22=[CASE(=($17, 2), CAST($19):VARCHAR NOT NULL, null:NULL)], $f23=[CASE(=($17, 3), CAST($19):VARCHAR NOT NULL, null:NULL)], $f24=[CASE(=($17, 4), CAST($19):VARCHAR NOT NULL, null:NULL)]) + LogicalFilter(condition=[IS NOT NULL($19)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __row_id__=[$17], column_names=[$18], value=[CASE(=($18, 'account_number'), CAST($0):VARCHAR NOT NULL, =($18, 'firstname'), CAST($1):VARCHAR NOT NULL, =($18, 'address'), CAST($2):VARCHAR NOT NULL, =($18, 'balance'), CAST($3):VARCHAR NOT NULL, =($18, 'gender'), CAST($4):VARCHAR NOT NULL, =($18, 'city'), CAST($5):VARCHAR NOT NULL, =($18, 'employer'), CAST($6):VARCHAR NOT NULL, =($18, 'state'), CAST($7):VARCHAR NOT NULL, =($18, 'age'), CAST($8):VARCHAR NOT NULL, =($18, 'email'), CAST($9):VARCHAR NOT NULL, =($18, 'lastname'), CAST($10):VARCHAR NOT NULL, =($18, '_id'), CAST($11):VARCHAR NOT NULL, =($18, '_index'), CAST($12):VARCHAR NOT NULL, =($18, '_score'), NUMBER_TO_STRING($13), =($18, '_maxscore'), NUMBER_TO_STRING($14), =($18, '_sort'), CAST($15):VARCHAR NOT NULL, =($18, '_routing'), CAST($16):VARCHAR NOT NULL, null:NULL)]) + LogicalJoin(condition=[true], joinType=[inner]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __row_id__=[ROW_NUMBER() OVER ()]) + LogicalSort(fetch=[5]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) + LogicalValues(tuples=[[{ 'account_number' }, { 'firstname' }, { 'address' }, { 'balance' }, { 'gender' }, { 'city' }, { 'employer' }, { 'state' }, { 'age' }, { 'email' }, { 'lastname' }, { '_id' }, { '_index' }, { '_score' }, { '_maxscore' }, { '_sort' }, { '_routing' }]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableAggregate(group=[{0}], row 1=[MAX($1) FILTER $2], row 2=[MAX($3) FILTER $4], row 3=[MAX($5) FILTER $6], row 4=[MAX($7) FILTER $8]) + EnumerableCalc(expr#0..18=[{inputs}], expr#19=['account_number'], expr#20=[=($t18, $t19)], expr#21=[CAST($t0):VARCHAR NOT NULL], expr#22=['firstname'], expr#23=[=($t18, $t22)], expr#24=[CAST($t1):VARCHAR NOT NULL], expr#25=['address'], expr#26=[=($t18, $t25)], expr#27=[CAST($t2):VARCHAR NOT NULL], expr#28=['balance'], expr#29=[=($t18, $t28)], expr#30=[CAST($t3):VARCHAR NOT NULL], expr#31=['gender'], expr#32=[=($t18, $t31)], expr#33=[CAST($t4):VARCHAR NOT NULL], expr#34=['city'], expr#35=[=($t18, $t34)], expr#36=[CAST($t5):VARCHAR NOT NULL], expr#37=['employer'], expr#38=[=($t18, $t37)], expr#39=[CAST($t6):VARCHAR NOT NULL], expr#40=['state'], expr#41=[=($t18, $t40)], expr#42=[CAST($t7):VARCHAR NOT NULL], expr#43=['age'], expr#44=[=($t18, $t43)], expr#45=[CAST($t8):VARCHAR NOT NULL], expr#46=['email'], expr#47=[=($t18, $t46)], expr#48=[CAST($t9):VARCHAR NOT NULL], expr#49=['lastname'], expr#50=[=($t18, $t49)], expr#51=[CAST($t10):VARCHAR NOT NULL], expr#52=['_id'], expr#53=[=($t18, $t52)], expr#54=[CAST($t11):VARCHAR NOT NULL], expr#55=['_index'], expr#56=[=($t18, $t55)], expr#57=[CAST($t12):VARCHAR NOT NULL], expr#58=['_score'], expr#59=[=($t18, $t58)], expr#60=[NUMBER_TO_STRING($t13)], expr#61=['_maxscore'], expr#62=[=($t18, $t61)], expr#63=[NUMBER_TO_STRING($t14)], expr#64=['_sort'], expr#65=[=($t18, $t64)], expr#66=[CAST($t15):VARCHAR NOT NULL], expr#67=['_routing'], expr#68=[=($t18, $t67)], expr#69=[CAST($t16):VARCHAR NOT NULL], expr#70=[null:NULL], expr#71=[CASE($t20, $t21, $t23, $t24, $t26, $t27, $t29, $t30, $t32, $t33, $t35, $t36, $t38, $t39, $t41, $t42, $t44, $t45, $t47, $t48, $t50, $t51, $t53, $t54, $t56, $t57, $t59, $t60, $t62, $t63, $t65, $t66, $t68, $t69, $t70)], expr#72=[CAST($t71):VARCHAR NOT NULL], expr#73=[1], expr#74=[=($t17, $t73)], expr#75=[2], expr#76=[=($t17, $t75)], expr#77=[3], expr#78=[=($t17, $t77)], expr#79=[4], expr#80=[=($t17, $t79)], column_names=[$t18], value=[$t72], $f6=[$t74], value0=[$t72], $f8=[$t76], value1=[$t72], $f10=[$t78], value2=[$t72], $f12=[$t80]) + EnumerableNestedLoopJoin(condition=[CASE(SEARCH($18, Sarg['_id':CHAR(14), '_index':CHAR(14), 'account_number', 'address':CHAR(14), 'age':CHAR(14), 'balance':CHAR(14), 'city':CHAR(14), 'email':CHAR(14), 'employer':CHAR(14), 'firstname':CHAR(14), 'gender':CHAR(14), 'lastname':CHAR(14), 'state':CHAR(14)]:CHAR(14)), true, =($18, '_score'), IS NOT NULL(NUMBER_TO_STRING($13)), =($18, '_maxscore'), IS NOT NULL(NUMBER_TO_STRING($14)), SEARCH($18, Sarg['_routing', '_sort':CHAR(8)]:CHAR(8)), true, false)], joinType=[inner]) + EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[LIMIT->5], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":5,"timeout":"1m"}, requestedTotalSize=5, pageSize=null, startFrom=0)]) + EnumerableValues(tuples=[[{ 'account_number' }, { 'firstname' }, { 'address' }, { 'balance' }, { 'gender' }, { 'city' }, { 'employer' }, { 'state' }, { 'age' }, { 'email' }, { 'lastname' }, { '_id' }, { '_index' }, { '_score' }, { '_maxscore' }, { '_sort' }, { '_routing' }]]) diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 71162e81bd8..c16d15c30b8 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -46,6 +46,7 @@ ML: 'ML'; FILLNULL: 'FILLNULL'; FLATTEN: 'FLATTEN'; TRENDLINE: 'TRENDLINE'; +TRANSPOSE: 'TRANSPOSE'; CHART: 'CHART'; TIMECHART: 'TIMECHART'; APPENDCOL: 'APPENDCOL'; @@ -70,6 +71,7 @@ LABEL: 'LABEL'; SHOW_NUMBERED_TOKEN: 'SHOW_NUMBERED_TOKEN'; AGGREGATION: 'AGGREGATION'; APPENDPIPE: 'APPENDPIPE'; +COLUMN_NAME: 'COLUMN_NAME'; //Native JOIN KEYWORDS JOIN: 'JOIN'; @@ -157,6 +159,7 @@ INPUT: 'INPUT'; OUTPUT: 'OUTPUT'; PATH: 'PATH'; + // COMPARISON FUNCTION KEYWORDS CASE: 'CASE'; ELSE: 'ELSE'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 7045796a03c..03b0f71e148 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -85,6 +85,7 @@ commands | regexCommand | chartCommand | timechartCommand + | transposeCommand | rexCommand | appendPipeCommand | replaceCommand @@ -328,6 +329,16 @@ timechartCommand : TIMECHART timechartParameter* statsAggTerm (BY fieldExpression)? timechartParameter* ; +transposeCommand + : TRANSPOSE transposeParameter* + ; + +transposeParameter + : (number = integerLiteral) + | (COLUMN_NAME EQUAL stringLiteral) + ; + + timechartParameter : LIMIT EQUAL integerLiteral | SPAN EQUAL spanLiteral @@ -1657,5 +1668,7 @@ searchableKeyWord | FIELDNAME | ROW | COL + | TRANSPOSE + | COLUMN_NAME ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 3f4f3049365..0c93998f768 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -111,6 +111,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Window; @@ -736,6 +737,13 @@ public UnresolvedPlan visitReverseCommand(OpenSearchPPLParser.ReverseCommandCont return new Reverse(); } + /** Transpose command. */ + @Override + public UnresolvedPlan visitTransposeCommand(OpenSearchPPLParser.TransposeCommandContext ctx) { + java.util.Map arguments = ArgumentFactory.getArgumentList(ctx); + return new Transpose(arguments); + } + /** Chart command. */ @Override public UnresolvedPlan visitChartCommand(OpenSearchPPLParser.ChartCommandContext ctx) { diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java index ed76b29b77a..72090e2f069 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/ArgumentFactory.java @@ -8,7 +8,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Optional; import org.antlr.v4.runtime.ParserRuleContext; import org.opensearch.sql.ast.dsl.AstDSL; @@ -308,6 +310,30 @@ public static List getArgumentList( return arguments; } + public static Map getArgumentList( + OpenSearchPPLParser.TransposeCommandContext transposeCommandContext) { + Map arguments = new HashMap<>(); + for (OpenSearchPPLParser.TransposeParameterContext ctx : + transposeCommandContext.transposeParameter()) { + + if (ctx.COLUMN_NAME() != null) { + if (ctx.stringLiteral() == null) { + throw new IllegalArgumentException("COLUMN_NAME requires a string literal value"); + } + Literal columnName = getArgumentValue(ctx.stringLiteral()); + arguments.put("columnName", new Argument("columnName", columnName)); + } else if (ctx.number != null) { + + arguments.put("number", new Argument("number", getArgumentValue(ctx.number))); + } else { + throw new IllegalArgumentException( + String.format( + "A parameter of transpose must be a int limit, column_name , got %s", ctx)); + } + } + return arguments; + } + /** * Get list of {@link Argument}. * diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 7d04ad8e6ad..41656061611 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -100,6 +100,7 @@ import org.opensearch.sql.ast.tree.StreamWindow; import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; +import org.opensearch.sql.ast.tree.Transpose; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.ast.tree.Values; @@ -636,6 +637,27 @@ public String visitTrendline(Trendline node, String context) { return StringUtils.format("%s | trendline %s", child, computations); } + @Override + public String visitTranspose(Transpose node, String context) { + if (node.getChild().isEmpty()) { + return "source=*** | transpose"; + } + String child = node.getChild().get(0).accept(this, context); + StringBuilder anonymized = new StringBuilder(StringUtils.format("%s | transpose", child)); + java.util.Map arguments = node.getArguments(); + + if (arguments.containsKey("number")) { + Argument numberArg = arguments.get("number"); + if (numberArg != null) { + anonymized.append(StringUtils.format(" %s", numberArg.getValue())); + } + } + if (arguments.containsKey("columnName")) { + anonymized.append(StringUtils.format(" %s=***", "column_name")); + } + return anonymized.toString(); + } + @Override public String visitAppendCol(AppendCol node, String context) { String child = node.getChild().get(0).accept(this, context); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java new file mode 100644 index 00000000000..3dd0ac3ceee --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTransposeTest.java @@ -0,0 +1,215 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.test.CalciteAssert; +import org.junit.Test; + +public class CalcitePPLTransposeTest extends CalcitePPLAbstractTest { + + public CalcitePPLTransposeTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testSimpleCountWithTranspose() { + String ppl = "source=EMP | stats count() as c|transpose"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)], row" + + " 4=[MAX($4)], row 5=[MAX($5)])\n" + + " LogicalProject(column=[$2], $f5=[CASE(=($1, 1), CAST($3):VARCHAR NOT NULL," + + " null:NULL)], $f6=[CASE(=($1, 2), CAST($3):VARCHAR NOT NULL, null:NULL)]," + + " $f7=[CASE(=($1, 3), CAST($3):VARCHAR NOT NULL, null:NULL)], $f8=[CASE(=($1, 4)," + + " CAST($3):VARCHAR NOT NULL, null:NULL)], $f9=[CASE(=($1, 5), CAST($3):VARCHAR NOT" + + " NULL, null:NULL)])\n" + + " LogicalFilter(condition=[IS NOT NULL($3)])\n" + + " LogicalProject(c=[$0], __row_id__=[$1], column=[$2], value=[CASE(=($2, 'c')," + + " CAST($0):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(c=[$0], __row_id__=[ROW_NUMBER() OVER ()])\n" + + " LogicalAggregate(group=[{}], c=[COUNT()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'c' }]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = "column=c; row 1=14; row 2=null; row 3=null; row 4=null; row 5=null\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `column`, MAX(CASE WHEN `__row_id__` = 1 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 1`, MAX(CASE WHEN `__row_id__` = 2 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 2`, MAX(CASE WHEN `__row_id__` = 3 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 3`, MAX(CASE WHEN `__row_id__` = 4 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 4`, MAX(CASE WHEN `__row_id__` = 5 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 5`\n" + + "FROM (SELECT `t0`.`c`, `t0`.`__row_id__`, `t1`.`column`, CASE WHEN `t1`.`column` =" + + " 'c' THEN CAST(`t0`.`c` AS STRING) ELSE NULL END `value`\n" + + "FROM (SELECT COUNT(*) `c`, ROW_NUMBER() OVER () `__row_id__`\n" + + "FROM `scott`.`EMP`) `t0`\n" + + "CROSS JOIN (VALUES ('c')) `t1` (`column`)) `t2`\n" + + "WHERE `t2`.`value` IS NOT NULL\n" + + "GROUP BY `column`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMultipleAggregatesWithAliasesTranspose() { + String ppl = + "source=EMP | stats avg(SAL) as avg_sal, max(SAL) as max_sal, min(SAL) as min_sal, count()" + + " as cnt|transpose "; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)], row" + + " 4=[MAX($4)], row 5=[MAX($5)])\n" + + " LogicalProject(column=[$5], $f8=[CASE(=($4, 1), CAST($6):VARCHAR NOT NULL," + + " null:NULL)], $f9=[CASE(=($4, 2), CAST($6):VARCHAR NOT NULL, null:NULL)]," + + " $f10=[CASE(=($4, 3), CAST($6):VARCHAR NOT NULL, null:NULL)], $f11=[CASE(=($4, 4)," + + " CAST($6):VARCHAR NOT NULL, null:NULL)], $f12=[CASE(=($4, 5), CAST($6):VARCHAR NOT" + + " NULL, null:NULL)])\n" + + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," + + " __row_id__=[$4], column=[$5], value=[CASE(=($5, 'avg_sal'), NUMBER_TO_STRING($0)," + + " =($5, 'max_sal'), NUMBER_TO_STRING($1), =($5, 'min_sal'), NUMBER_TO_STRING($2)," + + " =($5, 'cnt'), CAST($3):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(avg_sal=[$0], max_sal=[$1], min_sal=[$2], cnt=[$3]," + + " __row_id__=[ROW_NUMBER() OVER ()])\n" + + " LogicalAggregate(group=[{}], avg_sal=[AVG($0)], max_sal=[MAX($0)]," + + " min_sal=[MIN($0)], cnt=[COUNT()])\n" + + " LogicalProject(SAL=[$5])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'avg_sal' }, { 'max_sal' }, { 'min_sal' }, {" + + " 'cnt' }]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = + "column=avg_sal; row 1=2073.214285; row 2=null; row 3=null; row 4=null; row 5=null\n" + + "column=max_sal; row 1=5000.00; row 2=null; row 3=null; row 4=null; row 5=null\n" + + "column=cnt ; row 1=14; row 2=null; row 3=null; row 4=null; row 5=null\n" + + "column=min_sal; row 1=800.00; row 2=null; row 3=null; row 4=null; row 5=null\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `column`, MAX(CASE WHEN `__row_id__` = 1 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 1`, MAX(CASE WHEN `__row_id__` = 2 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 2`, MAX(CASE WHEN `__row_id__` = 3 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 3`, MAX(CASE WHEN `__row_id__` = 4 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 4`, MAX(CASE WHEN `__row_id__` = 5 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 5`\n" + + "FROM (SELECT `t1`.`avg_sal`, `t1`.`max_sal`, `t1`.`min_sal`, `t1`.`cnt`," + + " `t1`.`__row_id__`, `t2`.`column`, CASE WHEN `t2`.`column` = 'avg_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`avg_sal`) WHEN `t2`.`column` = 'max_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`max_sal`) WHEN `t2`.`column` = 'min_sal' THEN" + + " NUMBER_TO_STRING(`t1`.`min_sal`) WHEN `t2`.`column` = 'cnt' THEN CAST(`t1`.`cnt` AS" + + " STRING) ELSE NULL END `value`\n" + + "FROM (SELECT AVG(`SAL`) `avg_sal`, MAX(`SAL`) `max_sal`, MIN(`SAL`) `min_sal`," + + " COUNT(*) `cnt`, ROW_NUMBER() OVER () `__row_id__`\n" + + "FROM `scott`.`EMP`) `t1`\n" + + "CROSS JOIN (VALUES ('avg_sal'),\n" + + "('max_sal'),\n" + + "('min_sal'),\n" + + "('cnt')) `t2` (`column`)) `t3`\n" + + "WHERE `t3`.`value` IS NOT NULL\n" + + "GROUP BY `column`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testTransposeWithLimit() { + String ppl = "source=EMP | fields ENAME, COMM, JOB, SAL | transpose 3"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)])\n" + + " LogicalProject(column=[$5], $f8=[CASE(=($4, 1), CAST($6):VARCHAR NOT NULL," + + " null:NULL)], $f9=[CASE(=($4, 2), CAST($6):VARCHAR NOT NULL, null:NULL)]," + + " $f10=[CASE(=($4, 3), CAST($6):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3], __row_id__=[$4]," + + " column=[$5], value=[CASE(=($5, 'ENAME'), CAST($0):VARCHAR NOT NULL, =($5, 'COMM')," + + " NUMBER_TO_STRING($1), =($5, 'JOB'), CAST($2):VARCHAR NOT NULL, =($5, 'SAL')," + + " NUMBER_TO_STRING($3), null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," + + " __row_id__=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM' }, { 'JOB' }, { 'SAL' }]])\n"; + String expectedResult = + "column=ENAME; row 1=SMITH; row 2=ALLEN; row 3=WARD\n" + + "column=COMM ; row 1=null; row 2=300.00; row 3=500.00\n" + + "column=JOB ; row 1=CLERK; row 2=SALESMAN; row 3=SALESMAN\n" + + "column=SAL ; row 1=800.00; row 2=1600.00; row 3=1250.00\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `column`, MAX(CASE WHEN `__row_id__` = 1 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 1`, MAX(CASE WHEN `__row_id__` = 2 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 2`, MAX(CASE WHEN `__row_id__` = 3 THEN CAST(`value` AS STRING) ELSE NULL" + + " END) `row 3`\n" + + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`, `t`.`__row_id__`," + + " `t0`.`column`, CASE WHEN `t0`.`column` = 'ENAME' THEN CAST(`t`.`ENAME` AS STRING)" + + " WHEN `t0`.`column` = 'COMM' THEN NUMBER_TO_STRING(`t`.`COMM`) WHEN `t0`.`column` =" + + " 'JOB' THEN CAST(`t`.`JOB` AS STRING) WHEN `t0`.`column` = 'SAL' THEN" + + " NUMBER_TO_STRING(`t`.`SAL`) ELSE NULL END `value`\n" + + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER () `__row_id__`\n" + + "FROM `scott`.`EMP`) `t`\n" + + "CROSS JOIN (VALUES ('ENAME'),\n" + + "('COMM'),\n" + + "('JOB'),\n" + + "('SAL')) `t0` (`column`)) `t1`\n" + + "WHERE `t1`.`value` IS NOT NULL\n" + + "GROUP BY `column`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testTransposeWithLimitColumnName() { + String ppl = + "source=EMP | fields ENAME, COMM, JOB, SAL | transpose 3 column_name='column_names'"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalAggregate(group=[{0}], row 1=[MAX($1)], row 2=[MAX($2)], row 3=[MAX($3)])\n" + + " LogicalProject(column_names=[$5], $f8=[CASE(=($4, 1), CAST($6):VARCHAR NOT NULL," + + " null:NULL)], $f9=[CASE(=($4, 2), CAST($6):VARCHAR NOT NULL, null:NULL)]," + + " $f10=[CASE(=($4, 3), CAST($6):VARCHAR NOT NULL, null:NULL)])\n" + + " LogicalFilter(condition=[IS NOT NULL($6)])\n" + + " LogicalProject(ENAME=[$0], COMM=[$1], JOB=[$2], SAL=[$3], __row_id__=[$4]," + + " column_names=[$5], value=[CASE(=($5, 'ENAME'), CAST($0):VARCHAR NOT NULL, =($5," + + " 'COMM'), NUMBER_TO_STRING($1), =($5, 'JOB'), CAST($2):VARCHAR NOT NULL, =($5," + + " 'SAL'), NUMBER_TO_STRING($3), null:NULL)])\n" + + " LogicalJoin(condition=[true], joinType=[inner])\n" + + " LogicalProject(ENAME=[$1], COMM=[$6], JOB=[$2], SAL=[$5]," + + " __row_id__=[ROW_NUMBER() OVER ()])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalValues(tuples=[[{ 'ENAME' }, { 'COMM' }, { 'JOB' }, { 'SAL' }]])\n"; + verifyLogical(root, expectedLogical); + String expectedResult = + "column_names=ENAME; row 1=SMITH; row 2=ALLEN; row 3=WARD\n" + + "column_names=COMM ; row 1=null; row 2=300.00; row 3=500.00\n" + + "column_names=JOB ; row 1=CLERK; row 2=SALESMAN; row 3=SALESMAN\n" + + "column_names=SAL ; row 1=800.00; row 2=1600.00; row 3=1250.00\n"; + verifyResult(root, expectedResult); + + String expectedSparkSql = + "SELECT `column_names`, MAX(CASE WHEN `__row_id__` = 1 THEN CAST(`value` AS STRING) ELSE" + + " NULL END) `row 1`, MAX(CASE WHEN `__row_id__` = 2 THEN CAST(`value` AS STRING) ELSE" + + " NULL END) `row 2`, MAX(CASE WHEN `__row_id__` = 3 THEN CAST(`value` AS STRING) ELSE" + + " NULL END) `row 3`\n" + + "FROM (SELECT `t`.`ENAME`, `t`.`COMM`, `t`.`JOB`, `t`.`SAL`, `t`.`__row_id__`," + + " `t0`.`column_names`, CASE WHEN `t0`.`column_names` = 'ENAME' THEN CAST(`t`.`ENAME`" + + " AS STRING) WHEN `t0`.`column_names` = 'COMM' THEN NUMBER_TO_STRING(`t`.`COMM`) WHEN" + + " `t0`.`column_names` = 'JOB' THEN CAST(`t`.`JOB` AS STRING) WHEN `t0`.`column_names`" + + " = 'SAL' THEN NUMBER_TO_STRING(`t`.`SAL`) ELSE NULL END `value`\n" + + "FROM (SELECT `ENAME`, `COMM`, `JOB`, `SAL`, ROW_NUMBER() OVER () `__row_id__`\n" + + "FROM `scott`.`EMP`) `t`\n" + + "CROSS JOIN (VALUES ('ENAME'),\n" + + "('COMM'),\n" + + "('JOB'),\n" + + "('SAL')) `t0` (`column_names`)) `t1`\n" + + "WHERE `t1`.`value` IS NOT NULL\n" + + "GROUP BY `column_names`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 2fd08988f6b..e80b12caba8 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -247,6 +247,13 @@ public void testDedupCommand() { anonymize("source=t | dedup f1, f2")); } + @Test + public void testTransposeCommand() { + assertEquals( + "source=table | transpose 5 column_name=***", + anonymize("source=t | transpose 5 column_name='column_names'")); + } + @Test public void testTrendlineCommand() { assertEquals(