Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

package org.opensearch.sql.calcite.big5;

import static org.opensearch.sql.util.MatcherUtils.assertYamlEqualsIgnoreId;

import java.io.IOException;
import org.junit.FixMethodOrder;
import org.junit.Test;
Expand Down Expand Up @@ -42,4 +44,32 @@ public void coalesce_nonexistent_field_fallback() throws IOException {
String ppl = sanitize(loadExpectedQuery("coalesce_nonexistent_field_fallback.ppl"));
timing(summary, "coalesce_nonexistent_field_fallback", ppl);
}

@Test
public void dedup_metrics_size_field() throws IOException {
String ppl = sanitize(loadExpectedQuery("dedup_metrics_size_field.ppl"));
timing(summary, "dedup_metrics_size_field", ppl);
String expected = loadExpectedPlan("dedup_metrics_size_field.yaml");
assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl));
}

@Test
public void rex_regex_transformation() throws IOException {
String ppl = sanitize(loadExpectedQuery("rex_regex_transformation.ppl"));
timing(summary, "rex_regex_transformation", ppl);
String expected = loadExpectedPlan("rex_regex_transformation.yaml");
assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl));
}

@Test
public void script_engine_like_pattern_with_aggregation() throws IOException {
String ppl = sanitize(loadExpectedQuery("script_engine_like_pattern_with_aggregation.ppl"));
timing(summary, "script_engine_like_pattern_with_aggregation", ppl);
}

@Test
public void script_engine_like_pattern_with_sort() throws IOException {
String ppl = sanitize(loadExpectedQuery("script_engine_like_pattern_with_sort.ppl"));
timing(summary, "script_engine_like_pattern_with_sort", ppl);
}
}
4 changes: 4 additions & 0 deletions integ-test/src/test/resources/big5/data/big5.json
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
{"index":{}}
{"message":"2023-04-30T21:48:56.160Z Apr 30 21:48:56 ip-66-221-134-40 journal: donkey glazer fly shark whip servant thornfalcon","process":{"name":"journal"},"aws.cloudwatch":{"ingestion_time":"2023-04-30T21:48:56.160Z","log_group":"/var/log/messages","log_stream":"luckcrafter"},"tags":["preserve_original_event"],"meta":{"file":"2023-01-02/1682891301-gotext.ndjson.gz"},"cloud":{"region":"eu-central-1"},"@timestamp":"2023-01-02T22:02:34.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"tmin":849,"size":1981},"log.file.path":"/var/log/messages/luckcrafter","event":{"id":"sunsetmark","dataset":"generic","ingested":"2023-07-20T03:36:30.223806Z"},"agent":{"id":"c315dc22-3ea6-44dc-8d56-fd02f675367b","name":"fancydancer","ephemeral_id":"c315dc22-3ea6-44dc-8d56-fd02f675367b","type":"filebeat","version":"8.8.0"}}
{"index":{}}
{"message":"2024-04-11T18:00:10.965Z Apr 11 18:00:10 ip-32-11-43-93 sshd: cloak bolt thorn hugger rib jackal wolverine shaker boar fighter taker boulderfox","process":{"name":"sshd"},"aws.cloudwatch":{"log_stream":"mirrorlighter","ingestion_time":"2024-04-11T18:00:10.965Z","log_group":"/var/log/messages"},"tags":["preserve_original_event"],"meta":{"file":"2024-04-11/1712851210-sshd.ndjson.gz"},"cloud":{"region":"ap-southeast-3"},"@timestamp":"2023-05-01T21:59:58.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"size":3166,"tmin":1},"log.file.path":"/var/log/messages/mirrorlighter","event":{"id":"patternantler","ingested":"2024-04-11T17:39:10.965818973Z","dataset":"generic"},"agent":{"id":"c79a289f-6c16-4de2-a6c8-8ee5c84473d5","name":"brindlehugger","type":"filebeat","version":"8.8.0","ephemeral_id":"c79a289f-6c16-4de2-a6c8-8ee5c84473d5"}}
{"index":{}}
{"message":"2024-04-11T10:15:01.628Z Apr 11 10:15:01 ip-95-21-51-112 kernel: kicker stinger slave dolphin sparkox","process":{"name":"kernel"},"aws.cloudwatch":{"log_stream":"plumebard","ingestion_time":"2024-04-11T10:15:01.628Z","log_group":"/var/log/messages"},"tags":["preserve_original_event"],"meta":{"file":"2024-04-11/1712826901-kernel.ndjson.gz"},"cloud":{"region":"ap-south-1"},"@timestamp":"2023-03-01T22:31:11.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"size":3993,"tmin":1},"log.file.path":"/var/log/messages/plumebard","event":{"id":"chipgambler","ingested":"2024-04-11T10:09:29.628941177Z","dataset":"generic"},"agent":{"id":"5f25fa16-6a99-489f-b1c5-f27c0627a459","name":"lemongrabber","type":"filebeat","version":"8.8.0","ephemeral_id":"5f25fa16-6a99-489f-b1c5-f27c0627a459"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
{
"name": "dedup_metrics_size_field",
"operation-type": "search",
"index": "{{index_name | default('custom-big5')}}",
"body": {
"query": {
"exists": {
"field": "metrics.size",
"boost": 1.0
}
},
"_source": {
"includes": ["agent", "process", "log", "message", "tags", "cloud", "input", "@timestamp", "ecs", "data_stream", "meta", "host", "metrics", "metrics.size", "aws", "event"],
"excludes": []
}
}
}
*/
source = big5
| dedup metrics.size
| sort - @timestamp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
{
"name": "rex_regex_transformation",
"operation-type": "search",
"index": "{{index_name | default('big5')}}",
"body": {
"query": {
"match_all": {}
},
"_source": {
"includes": ["log.file.path", "@timestamp"],
"excludes": []
},
"sort": [
{
"@timestamp": {
"order": "desc",
"missing": "_last"
}
}
]
}
}
*/
source = big5
| rex field=log.file.path '/var/log/(?<logType>\\w+)/(?<filename>\\w+)'
| eval filename_len = length(filename)
| fields log.file.path, logType, filename, filename_len, @timestamp
| sort - @timestamp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
{
"name": "script_engine_like_pattern_with_aggregation",
"operation-type": "search",
"index": "{{index_name | default('custom-big5')}}",
"body": {
"query": {
"script": {
"script": {
"source": "{\"langType\":\"calcite\",\"script\":\"...\"}",
"lang": "opensearch_compounded_script",
"params": {
"utcTimestamp": "{{current_timestamp}}"
}
},
"boost": 1.0
}
},
"_source": {
"includes": ["message", "metrics.size"],
"excludes": []
},
"aggregations": {
"composite_buckets": {
"composite": {
"size": 10000,
"sources": [
{
"metrics.size": {
"terms": {
"field": "metrics.size",
"missing_bucket": true,
"missing_order": "first",
"order": "asc"
}
}
}
]
}
}
}
}
}
*/
source = big5
| where like(`message`, '%sshd%')
| stats count() by metrics.size
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
{
"name": "script_engine_like_pattern_with_sort",
"operation-type": "search",
"index": "{{index_name | default('big5')}}",
"body": {
"query": {
"script": {
"script": {
"source": "{\"langType\":\"calcite\",\"script\":\"...\"}",
"lang": "opensearch_compounded_script",
"params": {
"utcTimestamp": "{{current_timestamp}}"
}
},
"boost": 1.0
}
},
"size": 10,
"_source": {
"includes": ["agent", "process", "log", "message", "tags", "cloud", "input", "@timestamp", "ecs", "data_stream", "meta", "host", "metrics", "aws", "event"],
"excludes": []
},
"sort": [
{
"@timestamp": {
"order": "desc",
"missing": "_last"
}
}
]
}
}
*/
source = big5
| where like(`message`, '%sshd%')
| sort - @timestamp
| head 10
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
calcite:
logical: |
LogicalSystemLimit(sort0=[$7], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])
LogicalProject(agent=[$0], process=[$6], log=[$8], message=[$11], tags=[$12], cloud=[$13], input=[$15], @timestamp=[$17], ecs=[$18], data_stream=[$20], meta=[$24], host=[$26], metrics=[$27], aws=[$30], event=[$35])
LogicalSort(sort0=[$17], dir0=[DESC-nulls-last])
LogicalProject(agent=[$0], agent.ephemeral_id=[$1], agent.id=[$2], agent.name=[$3], agent.type=[$4], agent.version=[$5], process=[$6], process.name=[$7], log=[$8], log.file=[$9], log.file.path=[$10], message=[$11], tags=[$12], cloud=[$13], cloud.region=[$14], input=[$15], input.type=[$16], @timestamp=[$17], ecs=[$18], ecs.version=[$19], data_stream=[$20], data_stream.dataset=[$21], data_stream.namespace=[$22], data_stream.type=[$23], meta=[$24], meta.file=[$25], host=[$26], metrics=[$27], metrics.size=[$28], metrics.tmin=[$29], aws=[$30], aws.cloudwatch=[$31], aws.cloudwatch.ingestion_time=[$32], aws.cloudwatch.log_group=[$33], aws.cloudwatch.log_stream=[$34], event=[$35], event.dataset=[$36], event.id=[$37], event.ingested=[$38], _id=[$39], _index=[$40], _score=[$41], _maxscore=[$42], _sort=[$43], _routing=[$44])
LogicalFilter(condition=[<=($45, 1)])
LogicalProject(agent=[$0], agent.ephemeral_id=[$1], agent.id=[$2], agent.name=[$3], agent.type=[$4], agent.version=[$5], process=[$6], process.name=[$7], log=[$8], log.file=[$9], log.file.path=[$10], message=[$11], tags=[$12], cloud=[$13], cloud.region=[$14], input=[$15], input.type=[$16], @timestamp=[$17], ecs=[$18], ecs.version=[$19], data_stream=[$20], data_stream.dataset=[$21], data_stream.namespace=[$22], data_stream.type=[$23], meta=[$24], meta.file=[$25], host=[$26], metrics=[$27], metrics.size=[$28], metrics.tmin=[$29], aws=[$30], aws.cloudwatch=[$31], aws.cloudwatch.ingestion_time=[$32], aws.cloudwatch.log_group=[$33], aws.cloudwatch.log_stream=[$34], event=[$35], event.dataset=[$36], event.id=[$37], event.ingested=[$38], _id=[$39], _index=[$40], _score=[$41], _maxscore=[$42], _sort=[$43], _routing=[$44], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $28 ORDER BY $28)])
LogicalFilter(condition=[IS NOT NULL($28)])
CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
physical: |
EnumerableLimit(fetch=[10000])
EnumerableSort(sort0=[$7], dir0=[DESC-nulls-last])
EnumerableCalc(expr#0..16=[{inputs}], expr#17=[1], expr#18=[<=($t16, $t17)], proj#0..12=[{exprs}], aws=[$t14], event=[$t15], $condition=[$t18])
EnumerableWindow(window#0=[window(partition {13} order by [13] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])
CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[agent, process, log, message, tags, cloud, input, @timestamp, ecs, data_stream, meta, host, metrics, metrics.size, aws, event], FILTER->IS NOT NULL($13)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","query":{"exists":{"field":"metrics.size","boost":1.0}},"_source":{"includes":["agent","process","log","message","tags","cloud","input","@timestamp","ecs","data_stream","meta","host","metrics","metrics.size","aws","event"],"excludes":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
calcite:
logical: |
LogicalSystemLimit(sort0=[$4], dir0=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])
LogicalSort(sort0=[$4], dir0=[DESC-nulls-last])
LogicalProject(log.file.path=[$10], logType=[ITEM(PARSE($10, '/var/log/(?<logType>\w+)/(?<filename>\w+)':VARCHAR, 'regex':VARCHAR), 'logType':VARCHAR)], filename=[ITEM(PARSE($10, '/var/log/(?<logType>\w+)/(?<filename>\w+)':VARCHAR, 'regex':VARCHAR), 'filename':VARCHAR)], filename_len=[CHAR_LENGTH(ITEM(PARSE($10, '/var/log/(?<logType>\w+)/(?<filename>\w+)':VARCHAR, 'regex':VARCHAR), 'filename':VARCHAR))], @timestamp=[$17])
CalciteLogicalIndexScan(table=[[OpenSearch, big5]])
physical: |
EnumerableCalc(expr#0..1=[{inputs}], expr#2=['/var/log/(?<logType>\w+)/(?<filename>\w+)':VARCHAR], expr#3=['regex':VARCHAR], expr#4=[PARSE($t0, $t2, $t3)], expr#5=['logType':VARCHAR], expr#6=[ITEM($t4, $t5)], expr#7=['filename':VARCHAR], expr#8=[ITEM($t4, $t7)], expr#9=[CHAR_LENGTH($t8)], log.file.path=[$t0], $f1=[$t6], $f2=[$t8], $f3=[$t9], @timestamp=[$t1])
CalciteEnumerableIndexScan(table=[[OpenSearch, big5]], PushDownContext=[[PROJECT->[log.file.path, @timestamp], SORT->[{
"@timestamp" : {
"order" : "desc",
"missing" : "_last"
}
}], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["log.file.path","@timestamp"],"excludes":[]},"sort":[{"@timestamp":{"order":"desc","missing":"_last"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Loading