Skip to content

Commit 67824df

Browse files
committed
peng - fix the extraction behavior without filter even when there is zero match
Signed-off-by: Jialiang Liang <[email protected]>
1 parent 62d0ae4 commit 67824df

3 files changed

Lines changed: 6 additions & 14 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -214,14 +214,6 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
214214
"Rex pattern must contain at least one named capture group");
215215
}
216216

217-
String filterPattern = patternStr.replaceAll("\\(\\?<[a-zA-Z][a-zA-Z0-9]*>", "(");
218-
RexNode regexMatchCondition =
219-
context.rexBuilder.makeCall(
220-
SqlLibraryOperators.REGEXP_CONTAINS,
221-
fieldRex,
222-
context.rexBuilder.makeLiteral(filterPattern));
223-
context.relBuilder.filter(regexMatchCondition);
224-
225217
List<RexNode> newFields = new ArrayList<>();
226218
List<String> newFieldNames = new ArrayList<>();
227219

docs/user/ppl/cmd/rex.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,10 @@ PPL query::
5757
+--------------------------+--------------+---------+
5858

5959

60-
Example 2: Extract with Filtering
61-
==================================
60+
Example 2: Handling Non-matching Patterns
61+
==========================================
6262

63-
The rex command automatically filters out events that don't match the pattern.
63+
The rex command returns all events, setting extracted fields to null for non-matching patterns.
6464

6565
PPL query::
6666

@@ -173,7 +173,7 @@ Feature rex parse
173173
============================= ============ ============
174174
Pattern Type Java Regex Java Regex
175175
Named Groups Required Yes Yes
176-
Filtering by Match Yes Yes
176+
Filtering by Match No Yes
177177
Multiple Matches Yes No
178178
Text Substitution Yes No
179179
Offset Tracking Yes No
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"calcite": {
3-
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], initial=[$17])\n LogicalSort(fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], initial=[REX_EXTRACT($10, '(?<initial>^[A-Z])', 1)])\n LogicalFilter(condition=[REGEXP_CONTAINS($10, '(^[A-Z])')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4-
"physical": "EnumerableCalc(expr#0..10=[{inputs}], expr#11=['(?<initial>^[A-Z])'], expr#12=[1], expr#13=[REX_EXTRACT($t10, $t11, $t12)], proj#0..10=[{exprs}], initial=[$t13])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[SCRIPT->REGEXP_CONTAINS($10, '(^[A-Z])'), LIMIT->5, PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"query\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQGy3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhZ2UiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJlbWFpbCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2lkIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2luZGV4IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiUkVBTCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogIl9zY29yZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfbWF4c2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc29ydCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9yb3V0aW5nIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogdHJ1ZQp9dAAEZXhwcnQBO3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfQ09OVEFJTlMiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMTAsCiAgICAgICJuYW1lIjogIiQxMCIKICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogIiheW0EtWl0pIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IDgKICAgICAgfQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAC3QADmFjY291bnRfbnVtYmVyfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QACWZpcnN0bmFtZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgAQeHB+cQB+AAp0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4AC3QABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABt4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+AB0AAAAAc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+ABF+cQB+AAp0AAZTVFJJTkd+cQB+ABd0AAdLZXl3b3JkcQB+ABx4dAAHYWRkcmVzc3NxAH4AD3EAfgAVcQB+ABhxAH4AHHNxAH4AAAAAAAN3BAAAAAB4dAAHYmFsYW5jZXEAfgAMdAAGZ2VuZGVyc3EAfgAPcQB+ABVxAH4AGHEAfgAcc3EAfgAAAAAAA3cEAAAAAnEAfgAhcQB+ACJ4dAAEY2l0eXNxAH4AD3EAfgAVcQB+ABhxAH4AHHNxAH4AAAAAAAN3BAAAAAJxAH4AIXEAfgAieHQACGVtcGxveWVyc3EAfgAPcQB+ABVxAH4AGHEAfgAcc3EAfgAAAAAAA3cEAAAAAnEAfgAhcQB+ACJ4dAAFc3RhdGVzcQB+AA9xAH4AFXEAfgAYcQB+ABxzcQB+AAAAAAADdwQAAAACcQB+ACFxAH4AInh0AANhZ2VxAH4ADHQABWVtYWlsc3EAfgAPcQB+ABVxAH4AGHEAfgAcc3EAfgAAAAAAA3cEAAAAAnEAfgAhcQB+ACJ4dAAIbGFzdG5hbWVzcQB+AA9xAH4AFXEAfgAYcQB+ABxzcQB+AAAAAAADdwQAAAACcQB+ACFxAH4AInh4eA==\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)])\n"
3+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], initial=[$17])\n LogicalSort(fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], initial=[REX_EXTRACT($10, '(?<initial>^[A-Z])', 1)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "EnumerableCalc(expr#0..10=[{inputs}], expr#11=['(?<initial>^[A-Z])'], expr#12=[1], expr#13=[REX_EXTRACT($t10, $t11, $t12)], proj#0..10=[{exprs}], $f11=[$t13])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[LIMIT->5, LIMIT->10000, PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]}}, requestedTotalSize=5, pageSize=null, startFrom=0)])\n"
55
}
66
}

0 commit comments

Comments
 (0)