diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java index 665b3f0a874..ad54d83c8be 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java @@ -5,6 +5,8 @@ package org.opensearch.sql.calcite.big5; +import static org.opensearch.sql.util.MatcherUtils.assertYamlEqualsIgnoreId; + import java.io.IOException; import org.junit.FixMethodOrder; import org.junit.Test; @@ -42,4 +44,32 @@ public void coalesce_nonexistent_field_fallback() throws IOException { String ppl = sanitize(loadExpectedQuery("coalesce_nonexistent_field_fallback.ppl")); timing(summary, "coalesce_nonexistent_field_fallback", ppl); } + + @Test + public void dedup_metrics_size_field() throws IOException { + String ppl = sanitize(loadExpectedQuery("dedup_metrics_size_field.ppl")); + timing(summary, "dedup_metrics_size_field", ppl); + String expected = loadExpectedPlan("dedup_metrics_size_field.yaml"); + assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); + } + + @Test + public void parse_regex_with_cast_transformation() throws IOException { + String ppl = sanitize(loadExpectedQuery("parse_regex_with_cast_transformation.ppl")); + timing(summary, "parse_regex_with_cast_transformation", ppl); + String expected = loadExpectedPlan("parse_regex_with_cast_transformation.yaml"); + assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl)); + } + + @Test + public void script_engine_like_pattern_with_aggregation() throws IOException { + String ppl = sanitize(loadExpectedQuery("script_engine_like_pattern_with_aggregation.ppl")); + timing(summary, "script_engine_like_pattern_with_aggregation", ppl); + } + + @Test + public void script_engine_like_pattern_with_sort() throws IOException { + String ppl = sanitize(loadExpectedQuery("script_engine_like_pattern_with_sort.ppl")); + timing(summary, "script_engine_like_pattern_with_sort", ppl); + } } diff --git a/integ-test/src/test/resources/big5/data/big5.json b/integ-test/src/test/resources/big5/data/big5.json index 30a6a81ab8e..c9a0dc07a14 100644 --- a/integ-test/src/test/resources/big5/data/big5.json +++ b/integ-test/src/test/resources/big5/data/big5.json @@ -1,2 +1,6 @@ {"index":{}} {"message":"2023-04-30T21:48:56.160Z Apr 30 21:48:56 ip-66-221-134-40 journal: donkey glazer fly shark whip servant thornfalcon","process":{"name":"journal"},"aws.cloudwatch":{"ingestion_time":"2023-04-30T21:48:56.160Z","log_group":"/var/log/messages","log_stream":"luckcrafter"},"tags":["preserve_original_event"],"meta":{"file":"2023-01-02/1682891301-gotext.ndjson.gz"},"cloud":{"region":"eu-central-1"},"@timestamp":"2023-01-02T22:02:34.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"tmin":849,"size":1981},"log.file.path":"/var/log/messages/luckcrafter","event":{"id":"sunsetmark","dataset":"generic","ingested":"2023-07-20T03:36:30.223806Z"},"agent":{"id":"c315dc22-3ea6-44dc-8d56-fd02f675367b","name":"fancydancer","ephemeral_id":"c315dc22-3ea6-44dc-8d56-fd02f675367b","type":"filebeat","version":"8.8.0"}} +{"index":{}} +{"message":"2024-04-11T18:00:10.965Z Apr 11 18:00:10 ip-32-11-43-93 sshd: cloak bolt thorn hugger rib jackal wolverine shaker boar fighter taker boulderfox","process":{"name":"sshd"},"aws.cloudwatch":{"log_stream":"mirrorlighter","ingestion_time":"2024-04-11T18:00:10.965Z","log_group":"/var/log/messages"},"tags":["preserve_original_event"],"meta":{"file":"2024-04-11/1712851210-sshd.ndjson.gz"},"cloud":{"region":"ap-southeast-3"},"@timestamp":"2023-05-01T21:59:58.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"size":3166,"tmin":1},"log.file.path":"/var/log/messages/mirrorlighter","event":{"id":"patternantler","ingested":"2024-04-11T17:39:10.965818973Z","dataset":"generic"},"agent":{"id":"c79a289f-6c16-4de2-a6c8-8ee5c84473d5","name":"brindlehugger","type":"filebeat","version":"8.8.0","ephemeral_id":"c79a289f-6c16-4de2-a6c8-8ee5c84473d5"}} +{"index":{}} +{"message":"2024-04-11T10:15:01.628Z Apr 11 10:15:01 ip-95-21-51-112 kernel: kicker stinger slave dolphin sparkox","process":{"name":"kernel"},"aws.cloudwatch":{"log_stream":"plumebard","ingestion_time":"2024-04-11T10:15:01.628Z","log_group":"/var/log/messages"},"tags":["preserve_original_event"],"meta":{"file":"2024-04-11/1712826901-kernel.ndjson.gz"},"cloud":{"region":"ap-south-1"},"@timestamp":"2023-03-01T22:31:11.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"size":3993,"tmin":1},"log.file.path":"/var/log/messages/plumebard","event":{"id":"chipgambler","ingested":"2024-04-11T10:09:29.628941177Z","dataset":"generic"},"agent":{"id":"5f25fa16-6a99-489f-b1c5-f27c0627a459","name":"lemongrabber","type":"filebeat","version":"8.8.0","ephemeral_id":"5f25fa16-6a99-489f-b1c5-f27c0627a459"}} diff --git a/integ-test/src/test/resources/big5/queries/dedup_metrics_size_field.ppl b/integ-test/src/test/resources/big5/queries/dedup_metrics_size_field.ppl new file mode 100644 index 00000000000..aca5106807c --- /dev/null +++ b/integ-test/src/test/resources/big5/queries/dedup_metrics_size_field.ppl @@ -0,0 +1,22 @@ +/* +{ + "name": "dedup_metrics_size_field", + "operation-type": "search", + "index": "{{index_name | default('custom-big5')}}", + "body": { + "query": { + "exists": { + "field": "metrics.size", + "boost": 1.0 + } + }, + "_source": { + "includes": ["agent", "process", "log", "message", "tags", "cloud", "input", "@timestamp", "ecs", "data_stream", "meta", "host", "metrics", "metrics.size", "aws", "event"], + "excludes": [] + } + } +} +*/ +source = big5 +| dedup `metrics.size` +| sort - `@timestamp` \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/parse_regex_with_cast_transformation.ppl b/integ-test/src/test/resources/big5/queries/parse_regex_with_cast_transformation.ppl new file mode 100644 index 00000000000..d8ca87e8e5c --- /dev/null +++ b/integ-test/src/test/resources/big5/queries/parse_regex_with_cast_transformation.ppl @@ -0,0 +1,29 @@ +/* +{ + "name": "parse_regex_with_cast_transformation", + "operation-type": "search", + "index": "{{index_name | default('big5')}}", + "body": { + "query": { + "match_all": {} + }, + "_source": { + "includes": ["log.file.path", "@timestamp"], + "excludes": [] + }, + "sort": [ + { + "@timestamp": { + "order": "desc", + "missing": "_last" + } + } + ] + } +} +*/ +source = big5 +| parse `log.file.path` '/var/log/(?\\w+)/(?\\w+)' +| eval filename_len = length(filename) +| fields `log.file.path`, logType, filename, filename_len, `@timestamp` +| sort - `@timestamp` \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/script_engine_like_pattern_with_aggregation.ppl b/integ-test/src/test/resources/big5/queries/script_engine_like_pattern_with_aggregation.ppl new file mode 100644 index 00000000000..83003fc5602 --- /dev/null +++ b/integ-test/src/test/resources/big5/queries/script_engine_like_pattern_with_aggregation.ppl @@ -0,0 +1,47 @@ +/* +{ + "name": "script_engine_like_pattern_with_aggregation", + "operation-type": "search", + "index": "{{index_name | default('custom-big5')}}", + "body": { + "query": { + "script": { + "script": { + "source": "{\"langType\":\"calcite\",\"script\":\"...\"}", + "lang": "opensearch_compounded_script", + "params": { + "utcTimestamp": "{{current_timestamp}}" + } + }, + "boost": 1.0 + } + }, + "_source": { + "includes": ["message", "metrics.size"], + "excludes": [] + }, + "aggregations": { + "composite_buckets": { + "composite": { + "size": 10000, + "sources": [ + { + "metrics.size": { + "terms": { + "field": "metrics.size", + "missing_bucket": true, + "missing_order": "first", + "order": "asc" + } + } + } + ] + } + } + } + } +} +*/ +source = big5 +| where like(`message`, '%sshd%') +| stats count() by `metrics.size` \ No newline at end of file diff --git a/integ-test/src/test/resources/big5/queries/script_engine_like_pattern_with_sort.ppl b/integ-test/src/test/resources/big5/queries/script_engine_like_pattern_with_sort.ppl new file mode 100644 index 00000000000..81abe8f2b41 --- /dev/null +++ b/integ-test/src/test/resources/big5/queries/script_engine_like_pattern_with_sort.ppl @@ -0,0 +1,37 @@ +/* +{ + "name": "script_engine_like_pattern_with_sort", + "operation-type": "search", + "index": "{{index_name | default('big5')}}", + "body": { + "query": { + "script": { + "script": { + "source": "{\"langType\":\"calcite\",\"script\":\"...\"}", + "lang": "opensearch_compounded_script", + "params": { + "utcTimestamp": "{{current_timestamp}}" + } + }, + "boost": 1.0 + } + }, + "_source": { + "includes": ["agent", "process", "log", "message", "tags", "cloud", "input", "@timestamp", "ecs", "data_stream", "meta", "host", "metrics", "aws", "event"], + "excludes": [] + }, + "sort": [ + { + "@timestamp": { + "order": "desc", + "missing": "_last" + } + } + ] + } +} +*/ +source = big5 +| where like(`message`, '%sshd%') +| sort - `@timestamp` +| head 10 \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/dedup_metrics_size_field.yaml b/integ-test/src/test/resources/expectedOutput/calcite/dedup_metrics_size_field.yaml new file mode 100644 index 00000000000..3c948626ef1 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/dedup_metrics_size_field.yaml @@ -0,0 +1,16 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$7], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(agent=[$0], process=[$6], log=[$8], message=[$11], tags=[$12], cloud=[$13], input=[$15], @timestamp=[$17], ecs=[$18], data_stream=[$20], meta=[$24], host=[$26], metrics=[$27], aws=[$30], event=[$35]) + LogicalSort(sort0=[$17], dir0=[DESC-nulls-last]) + LogicalProject(agent=[$0], agent.ephemeral_id=[$1], agent.id=[$2], agent.name=[$3], agent.type=[$4], agent.version=[$5], process=[$6], process.name=[$7], log=[$8], log.file=[$9], log.file.path=[$10], message=[$11], tags=[$12], cloud=[$13], cloud.region=[$14], input=[$15], input.type=[$16], @timestamp=[$17], ecs=[$18], ecs.version=[$19], data_stream=[$20], data_stream.dataset=[$21], data_stream.namespace=[$22], data_stream.type=[$23], meta=[$24], meta.file=[$25], host=[$26], metrics=[$27], metrics.size=[$28], metrics.tmin=[$29], aws=[$30], aws.cloudwatch=[$31], aws.cloudwatch.ingestion_time=[$32], aws.cloudwatch.log_group=[$33], aws.cloudwatch.log_stream=[$34], event=[$35], event.dataset=[$36], event.id=[$37], event.ingested=[$38], _id=[$39], _index=[$40], _score=[$41], _maxscore=[$42], _sort=[$43], _routing=[$44]) + LogicalFilter(condition=[<=($45, 1)]) + LogicalProject(agent=[$0], agent.ephemeral_id=[$1], agent.id=[$2], agent.name=[$3], agent.type=[$4], agent.version=[$5], process=[$6], process.name=[$7], log=[$8], log.file=[$9], log.file.path=[$10], message=[$11], tags=[$12], cloud=[$13], cloud.region=[$14], input=[$15], input.type=[$16], @timestamp=[$17], ecs=[$18], ecs.version=[$19], data_stream=[$20], data_stream.dataset=[$21], data_stream.namespace=[$22], data_stream.type=[$23], meta=[$24], meta.file=[$25], host=[$26], metrics=[$27], metrics.size=[$28], metrics.tmin=[$29], aws=[$30], aws.cloudwatch=[$31], aws.cloudwatch.ingestion_time=[$32], aws.cloudwatch.log_group=[$33], aws.cloudwatch.log_stream=[$34], event=[$35], event.dataset=[$36], event.id=[$37], event.ingested=[$38], _id=[$39], _index=[$40], _score=[$41], _maxscore=[$42], _sort=[$43], _routing=[$44], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $28 ORDER BY $28)]) + LogicalFilter(condition=[IS NOT NULL($28)]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableSort(sort0=[$7], dir0=[DESC-nulls-last]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[1], expr#18=[<=($t16, $t17)], proj#0..12=[{exprs}], aws=[$t14], event=[$t15], $condition=[$t18]) + EnumerableWindow(window#0=[window(partition {13} order by [13] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->IS NOT NULL($13)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"metrics.size","boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","metrics.size","aws","event"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/parse_regex_with_cast_transformation.yaml b/integ-test/src/test/resources/expectedOutput/calcite/parse_regex_with_cast_transformation.yaml new file mode 100644 index 00000000000..dd3e8bc82f6 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/parse_regex_with_cast_transformation.yaml @@ -0,0 +1,14 @@ +calcite: + logical: | + LogicalSystemLimit(sort0=[$4], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalSort(sort0=[$4], dir0=[DESC-nulls-last]) + LogicalProject(log.file.path=[$10], logType=[ITEM(PARSE($10, '/var/log/(?\w+)/(?\w+)':VARCHAR, 'regex':VARCHAR), 'logType':VARCHAR)], filename=[ITEM(PARSE($10, '/var/log/(?\w+)/(?\w+)':VARCHAR, 'regex':VARCHAR), 'filename':VARCHAR)], filename_len=[CHAR_LENGTH(ITEM(PARSE($10, '/var/log/(?\w+)/(?\w+)':VARCHAR, 'regex':VARCHAR), 'filename':VARCHAR))], @timestamp=[$17]) + CalciteLogicalIndexScan(table=[[OpenSearch, big5]]) + physical: | + EnumerableCalc(expr#0..1=[{inputs}], expr#2=['/var/log/(?\w+)/(?\w+)':VARCHAR], expr#3=['regex':VARCHAR], expr#4=[PARSE($t0, $t2, $t3)], expr#5=['logType':VARCHAR], expr#6=[ITEM($t4, $t5)], expr#7=['filename':VARCHAR], expr#8=[ITEM($t4, $t7)], expr#9=[CHAR_LENGTH($t8)], log.file.path=[$t0], $f1=[$t6], $f2=[$t8], $f3=[$t9], @timestamp=[$t1]) + CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[log.file.path, @timestamp], SORT->[{ + "@timestamp" : { + "order" : "desc", + "missing" : "_last" + } + }], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["log.file.path","@timestamp"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])