-
Notifications
You must be signed in to change notification settings - Fork 199
Add compare_ip operator udfs #3821
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
5d8c62c
3008d75
851f40e
e4f619e
73bec7b
2f9c5b4
40d80a4
a4916b2
f755494
0acc8f7
e587271
fb8e969
999fdf7
5151350
ea1414c
e2a124f
62a6084
eedd8c9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -614,6 +614,14 @@ public PPLTypeChecker getTypeChecker() { | |
| } | ||
|
|
||
| void populate() { | ||
| // register operators for IP comparing | ||
| registerOperator(NOTEQUAL, PPLBuiltinOperators.NOT_EQUALS_IP); | ||
| registerOperator(EQUAL, PPLBuiltinOperators.EQUALS_IP); | ||
| registerOperator(GREATER, PPLBuiltinOperators.GREATER_IP); | ||
| registerOperator(GTE, PPLBuiltinOperators.GTE_IP); | ||
| registerOperator(LESS, PPLBuiltinOperators.LESS_IP); | ||
| registerOperator(LTE, PPLBuiltinOperators.LTE_IP); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SqlStdOperatorTable.EQUALS` is a very basic std operator which widely used in calcite internal, I am worry that it may introduce potential bugs and performance regression for pushdown.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But Calcite's built-in operators like There was two solutions:
We opted the latter.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In order to keep the comparison logic the same as v2, I chose to add these specific operator udfs for IP comparison instead of converting it to string type that calcite can compare.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Can we move above logic to a specific method
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or modify to and leverage
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Modified, improving codes readability. |
||
|
|
||
| // Register std operator | ||
| registerOperator(AND, SqlStdOperatorTable.AND); | ||
| registerOperator(OR, SqlStdOperatorTable.OR); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -400,6 +400,10 @@ private static List<ExprType> getExprTypes(SqlTypeFamily family) { | |
| OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER)); | ||
| case ANY, IGNORE -> List.of( | ||
| OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.ANY)); | ||
| // We borrow SqlTypeFamily.NULL to represent EXPR_IP. This is a workaround | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we stop use
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @qianheng-aws How about this implementation ishaoxy#1
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It just came to me that there's not only compare that checks IP type. CIDR_MATCH also has to validate IP types. If we make a specific IP type checker for compare operators, we may also have to create one for each other functions like cidrmatch and geoip. |
||
| // since there is no corresponding IP type family in Calcite. | ||
| case NULL -> List.of( | ||
| OpenSearchTypeFactory.TYPE_FACTORY.createUDT(OpenSearchTypeFactory.ExprUDT.EXPR_IP)); | ||
| default -> { | ||
| RelDataType type = family.getDefaultConcreteType(OpenSearchTypeFactory.TYPE_FACTORY); | ||
| if (type == null) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,9 +12,11 @@ | |
| import org.apache.calcite.linq4j.tree.Expression; | ||
| import org.apache.calcite.linq4j.tree.Expressions; | ||
| import org.apache.calcite.rex.RexCall; | ||
| import org.apache.calcite.sql.type.CompositeOperandTypeChecker; | ||
| import org.apache.calcite.sql.type.OperandTypes; | ||
| import org.apache.calcite.sql.type.ReturnTypes; | ||
| import org.apache.calcite.sql.type.SqlReturnTypeInference; | ||
| import org.apache.calcite.sql.type.SqlTypeFamily; | ||
| import org.opensearch.sql.data.model.ExprIpValue; | ||
| import org.opensearch.sql.data.model.ExprValue; | ||
| import org.opensearch.sql.data.model.ExprValueUtils; | ||
|
|
@@ -44,9 +46,12 @@ public SqlReturnTypeInference getReturnTypeInference() { | |
|
|
||
| @Override | ||
| public UDFOperandMetadata getOperandMetadata() { | ||
| // EXPR_IP is mapped to SqlTypeFamily.VARCHAR in | ||
| // EXPR_IP is mapped to SqlTypeFamily.NULL in | ||
| // UserDefinedFunctionUtils.convertRelDataTypeToSqlTypeName | ||
| return UDFOperandMetadata.wrap(OperandTypes.STRING_STRING); | ||
| return UDFOperandMetadata.wrap( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we should use new
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Handled. |
||
| (CompositeOperandTypeChecker) | ||
| OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING) | ||
| .or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.STRING))); | ||
| } | ||
|
|
||
| public static class CidrMatchImplementor implements NotNullImplementor { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,134 @@ | ||
| /* | ||
| * Copyright OpenSearch Contributors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package org.opensearch.sql.expression.function.udf.ip; | ||
|
|
||
| import inet.ipaddr.IPAddress; | ||
| import java.util.List; | ||
| import org.apache.calcite.adapter.enumerable.NotNullImplementor; | ||
| import org.apache.calcite.adapter.enumerable.NullPolicy; | ||
| import org.apache.calcite.adapter.enumerable.RexToLixTranslator; | ||
| import org.apache.calcite.linq4j.tree.Expression; | ||
| import org.apache.calcite.linq4j.tree.Expressions; | ||
| import org.apache.calcite.rex.RexCall; | ||
| import org.apache.calcite.sql.type.CompositeOperandTypeChecker; | ||
| import org.apache.calcite.sql.type.OperandTypes; | ||
| import org.apache.calcite.sql.type.ReturnTypes; | ||
| import org.apache.calcite.sql.type.SqlReturnTypeInference; | ||
| import org.apache.calcite.sql.type.SqlTypeFamily; | ||
| import org.opensearch.sql.data.model.ExprIpValue; | ||
| import org.opensearch.sql.expression.function.ImplementorUDF; | ||
| import org.opensearch.sql.expression.function.UDFOperandMetadata; | ||
| import org.opensearch.sql.utils.IPUtils; | ||
|
|
||
| /** | ||
| * {@code compare(ip1, ip2)} compares two IP addresses using a provided op. | ||
| * | ||
| * <p>Signature: | ||
| * | ||
| * <ul> | ||
| * <li>(IP, STRING) -> BOOLEAN | ||
| * <li>(STRING, IP) -> BOOLEAN | ||
| * <li>(IP, IP) -> BOOLEAN | ||
| * </ul> | ||
| */ | ||
| public class CompareIpFunction extends ImplementorUDF { | ||
|
|
||
| private CompareIpFunction(ComparisonType comparisonType) { | ||
| super(new CompareImplementor(comparisonType), NullPolicy.ANY); | ||
| } | ||
|
|
||
| public static CompareIpFunction less() { | ||
| return new CompareIpFunction(ComparisonType.LESS); | ||
| } | ||
|
|
||
| public static CompareIpFunction greater() { | ||
| return new CompareIpFunction(ComparisonType.GREATER); | ||
| } | ||
|
|
||
| public static CompareIpFunction lessOrEquals() { | ||
| return new CompareIpFunction(ComparisonType.LESS_OR_EQUAL); | ||
| } | ||
|
|
||
| public static CompareIpFunction greaterOrEquals() { | ||
| return new CompareIpFunction(ComparisonType.GREATER_OR_EQUAL); | ||
| } | ||
|
|
||
| public static CompareIpFunction equals() { | ||
| return new CompareIpFunction(ComparisonType.EQUALS); | ||
| } | ||
|
|
||
| public static CompareIpFunction notEquals() { | ||
| return new CompareIpFunction(ComparisonType.NOT_EQUALS); | ||
| } | ||
|
|
||
| @Override | ||
| public SqlReturnTypeInference getReturnTypeInference() { | ||
| return ReturnTypes.BOOLEAN_FORCE_NULLABLE; | ||
| } | ||
|
|
||
| @Override | ||
| public UDFOperandMetadata getOperandMetadata() { | ||
| return UDFOperandMetadata.wrap( | ||
| (CompositeOperandTypeChecker) | ||
| OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.NULL) | ||
| .or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.STRING)) | ||
| .or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.NULL))); | ||
| } | ||
|
|
||
| public static class CompareImplementor implements NotNullImplementor { | ||
| private final ComparisonType comparisonType; | ||
|
|
||
| public CompareImplementor(ComparisonType comparisonType) { | ||
| this.comparisonType = comparisonType; | ||
| } | ||
|
|
||
| @Override | ||
| public Expression implement( | ||
| RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) { | ||
| return Expressions.call( | ||
| CompareImplementor.class, | ||
| "compare", | ||
| translatedOperands.get(0), | ||
| translatedOperands.get(1), | ||
| Expressions.constant(comparisonType)); | ||
| } | ||
|
|
||
| public static boolean compare(Object obj1, Object obj2, ComparisonType comparisonType) { | ||
| try { | ||
| String ip1 = extractIpString(obj1); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not leverage
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry for only focusing on IPUtils before and ignoring the functions encapsulated in ExprIpValue. Now fixed. |
||
| String ip2 = extractIpString(obj2); | ||
| IPAddress addr1 = IPUtils.toAddress(ip1); | ||
| IPAddress addr2 = IPUtils.toAddress(ip2); | ||
| int result = IPUtils.compare(addr1, addr2); | ||
| return switch (comparisonType) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For efficiency consideration, we'd better move this switch case to the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
| case EQUALS -> result == 0; | ||
| case NOT_EQUALS -> result != 0; | ||
| case LESS -> result < 0; | ||
| case LESS_OR_EQUAL -> result <= 0; | ||
| case GREATER -> result > 0; | ||
| case GREATER_OR_EQUAL -> result >= 0; | ||
| }; | ||
| } catch (Exception e) { | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| private static String extractIpString(Object obj) { | ||
| if (obj instanceof String) return (String) obj; | ||
| if (obj instanceof ExprIpValue) return ((ExprIpValue) obj).value(); | ||
| throw new IllegalArgumentException("Invalid IP type: " + obj); | ||
| } | ||
| } | ||
|
|
||
| public enum ComparisonType { | ||
| EQUALS, | ||
| NOT_EQUALS, | ||
| LESS, | ||
| LESS_OR_EQUAL, | ||
| GREATER, | ||
| GREATER_OR_EQUAL | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Questions, Do we need to support IP as UDT in PPL engine?
In OpenSearch PPL, IP handling depends on index field type. CIDR-based filtering should use ip_range queries for ip fields, script pushdown / in-memory processing for keyword, text, and runtime string fields.