-
Notifications
You must be signed in to change notification settings - Fork 200
Add compare_ip operator udfs #3821
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
5d8c62c
3008d75
851f40e
e4f619e
73bec7b
2f9c5b4
40d80a4
a4916b2
f755494
0acc8f7
e587271
fb8e969
999fdf7
5151350
ea1414c
e2a124f
62a6084
eedd8c9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -614,6 +614,14 @@ public PPLTypeChecker getTypeChecker() { | |
| } | ||
|
|
||
| void populate() { | ||
| // register operators for IP comparing | ||
| registerOperator(NOTEQUAL, PPLBuiltinOperators.NOT_EQUALS_IP); | ||
| registerOperator(EQUAL, PPLBuiltinOperators.EQUALS_IP); | ||
| registerOperator(GREATER, PPLBuiltinOperators.GREATER_IP); | ||
| registerOperator(GTE, PPLBuiltinOperators.GTE_IP); | ||
| registerOperator(LESS, PPLBuiltinOperators.LESS_IP); | ||
| registerOperator(LTE, PPLBuiltinOperators.LTE_IP); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SqlStdOperatorTable.EQUALS` is a very basic std operator which widely used in calcite internal, I am worry that it may introduce potential bugs and performance regression for pushdown.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But Calcite's built-in operators like There was two solutions:
We opted the latter.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In order to keep the comparison logic the same as v2, I chose to add these specific operator udfs for IP comparison instead of converting it to string type that calcite can compare.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Can we move above logic to a specific method
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or modify to and leverage
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Modified, improving codes readability. |
||
|
|
||
| // Register std operator | ||
| registerOperator(AND, SqlStdOperatorTable.AND); | ||
| registerOperator(OR, SqlStdOperatorTable.OR); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -400,6 +400,10 @@ private static List<ExprType> getExprTypes(SqlTypeFamily family) { | |
| OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER)); | ||
| case ANY, IGNORE -> List.of( | ||
| OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.ANY)); | ||
| // We borrow SqlTypeFamily.NULL to represent EXPR_IP. This is a workaround | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we stop use
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @qianheng-aws How about this implementation ishaoxy#1
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It just came to me that there's not only compare that checks IP type. CIDR_MATCH also has to validate IP types. If we make a specific IP type checker for compare operators, we may also have to create one for each other functions like cidrmatch and geoip. |
||
| // since there is no corresponding IP type family in Calcite. | ||
| case NULL -> List.of( | ||
| OpenSearchTypeFactory.TYPE_FACTORY.createUDT(OpenSearchTypeFactory.ExprUDT.EXPR_IP)); | ||
| default -> { | ||
| RelDataType type = family.getDefaultConcreteType(OpenSearchTypeFactory.TYPE_FACTORY); | ||
| if (type == null) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,9 +12,7 @@ | |
| import org.apache.calcite.linq4j.tree.Expression; | ||
| import org.apache.calcite.linq4j.tree.Expressions; | ||
| import org.apache.calcite.rex.RexCall; | ||
| import org.apache.calcite.sql.type.OperandTypes; | ||
| import org.apache.calcite.sql.type.ReturnTypes; | ||
| import org.apache.calcite.sql.type.SqlReturnTypeInference; | ||
| import org.apache.calcite.sql.type.*; | ||
|
ishaoxy marked this conversation as resolved.
Outdated
|
||
| import org.opensearch.sql.data.model.ExprIpValue; | ||
| import org.opensearch.sql.data.model.ExprValue; | ||
| import org.opensearch.sql.data.model.ExprValueUtils; | ||
|
|
@@ -44,9 +42,12 @@ public SqlReturnTypeInference getReturnTypeInference() { | |
|
|
||
| @Override | ||
| public UDFOperandMetadata getOperandMetadata() { | ||
| // EXPR_IP is mapped to SqlTypeFamily.VARCHAR in | ||
| // EXPR_IP is mapped to SqlTypeFamily.NULL in | ||
| // UserDefinedFunctionUtils.convertRelDataTypeToSqlTypeName | ||
| return UDFOperandMetadata.wrap(OperandTypes.STRING_STRING); | ||
| return UDFOperandMetadata.wrap( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we should use new
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Handled. |
||
| (CompositeOperandTypeChecker) | ||
| OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING) | ||
| .or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.STRING))); | ||
| } | ||
|
|
||
| public static class CidrMatchImplementor implements NotNullImplementor { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| /* | ||
| * Copyright OpenSearch Contributors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package org.opensearch.sql.expression.function.udf.ip; | ||
|
|
||
| import inet.ipaddr.IPAddress; | ||
| import java.util.List; | ||
| import org.apache.calcite.adapter.enumerable.NotNullImplementor; | ||
| import org.apache.calcite.adapter.enumerable.NullPolicy; | ||
| import org.apache.calcite.adapter.enumerable.RexToLixTranslator; | ||
| import org.apache.calcite.linq4j.tree.Expression; | ||
| import org.apache.calcite.linq4j.tree.Expressions; | ||
| import org.apache.calcite.rex.RexCall; | ||
| import org.apache.calcite.sql.type.*; | ||
| import org.opensearch.sql.data.model.ExprIpValue; | ||
| import org.opensearch.sql.exception.SemanticCheckException; | ||
| import org.opensearch.sql.expression.function.ImplementorUDF; | ||
| import org.opensearch.sql.expression.function.UDFOperandMetadata; | ||
| import org.opensearch.sql.utils.IPUtils; | ||
|
|
||
| /** | ||
| * {@code Equals(ip1, ip2)} checks if two IP addresses are equal. | ||
| * | ||
| * <p>Signature: | ||
| * | ||
| * <ul> | ||
| * <li>(STRING, STRING) -> BOOLEAN | ||
| * <li>(IP, STRING) -> BOOLEAN | ||
|
ishaoxy marked this conversation as resolved.
Outdated
|
||
| * <li>(STRING, IP) -> BOOLEAN | ||
| * <li>(IP, IP) -> BOOLEAN | ||
| * </ul> | ||
| */ | ||
| public class EqualsIpFunction extends ImplementorUDF { | ||
|
ishaoxy marked this conversation as resolved.
Outdated
|
||
| public EqualsIpFunction() { | ||
| super(new EqualsImplementor(), NullPolicy.ANY); | ||
| } | ||
|
|
||
| @Override | ||
| public SqlReturnTypeInference getReturnTypeInference() { | ||
| return ReturnTypes.BOOLEAN_FORCE_NULLABLE; | ||
| } | ||
|
|
||
| @Override | ||
| public UDFOperandMetadata getOperandMetadata() { | ||
| return UDFOperandMetadata.wrap( | ||
| (CompositeOperandTypeChecker) | ||
| OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.NULL) | ||
| .or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.STRING)) | ||
| .or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.NULL))); | ||
| } | ||
|
|
||
| public static class EqualsImplementor implements NotNullImplementor { | ||
| @Override | ||
| public Expression implement( | ||
| RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) { | ||
| return Expressions.call(EqualsImplementor.class, "Equals", translatedOperands); | ||
| } | ||
|
|
||
| public static boolean Equals(String ip1, String ip2) { | ||
| try { | ||
| IPAddress ipAddress1 = IPUtils.toAddress(ip1); | ||
| IPAddress ipAddress2 = IPUtils.toAddress(ip2); | ||
| return IPUtils.compare(ipAddress1, ipAddress2) == 0; | ||
| } catch (SemanticCheckException e) { | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| public static boolean Equals(String ip1, ExprIpValue ip2) { | ||
|
ishaoxy marked this conversation as resolved.
Outdated
|
||
| String ipAddress2 = ip2.value(); | ||
| return Equals(ip1, ipAddress2); | ||
| } | ||
|
|
||
| public static boolean Equals(ExprIpValue ip1, String ip2) { | ||
| String ipAddress1 = ip1.value(); | ||
| return Equals(ipAddress1, ip2); | ||
| } | ||
|
|
||
| public static boolean Equals(ExprIpValue ip1, ExprIpValue ip2) { | ||
| String ipAddress1 = ip1.value(); | ||
| String ipAddress2 = ip2.value(); | ||
| return Equals(ipAddress1, ipAddress2); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| /* | ||
| * Copyright OpenSearch Contributors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package org.opensearch.sql.expression.function.udf.ip; | ||
|
|
||
| import inet.ipaddr.IPAddress; | ||
| import java.util.List; | ||
| import org.apache.calcite.adapter.enumerable.NotNullImplementor; | ||
| import org.apache.calcite.adapter.enumerable.NullPolicy; | ||
| import org.apache.calcite.adapter.enumerable.RexToLixTranslator; | ||
| import org.apache.calcite.linq4j.tree.Expression; | ||
| import org.apache.calcite.linq4j.tree.Expressions; | ||
| import org.apache.calcite.rex.RexCall; | ||
| import org.apache.calcite.sql.type.*; | ||
| import org.opensearch.sql.data.model.ExprIpValue; | ||
| import org.opensearch.sql.exception.SemanticCheckException; | ||
| import org.opensearch.sql.expression.function.ImplementorUDF; | ||
| import org.opensearch.sql.expression.function.UDFOperandMetadata; | ||
| import org.opensearch.sql.utils.IPUtils; | ||
|
|
||
| /** | ||
| * {@code Greater(ip1, ip2)} checks if ip1 is greater than ip2. | ||
| * | ||
| * <p>Signature: | ||
| * | ||
| * <ul> | ||
| * <li>(STRING, STRING) -> BOOLEAN | ||
| * <li>(IP, STRING) -> BOOLEAN | ||
| * <li>(STRING, IP) -> BOOLEAN | ||
| * <li>(IP, IP) -> BOOLEAN | ||
| * </ul> | ||
| */ | ||
| public class GreaterIpFunction extends ImplementorUDF { | ||
| public GreaterIpFunction() { | ||
| super(new GreaterImplementor(), NullPolicy.ANY); | ||
| } | ||
|
|
||
| @Override | ||
| public SqlReturnTypeInference getReturnTypeInference() { | ||
| return ReturnTypes.BOOLEAN_FORCE_NULLABLE; | ||
| } | ||
|
|
||
| @Override | ||
| public UDFOperandMetadata getOperandMetadata() { | ||
| return UDFOperandMetadata.wrap( | ||
| (CompositeOperandTypeChecker) | ||
| OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.NULL) | ||
| .or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.STRING)) | ||
| .or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.NULL))); | ||
| } | ||
|
|
||
| public static class GreaterImplementor implements NotNullImplementor { | ||
| @Override | ||
| public Expression implement( | ||
| RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) { | ||
| return Expressions.call(GreaterImplementor.class, "Greater", translatedOperands); | ||
| } | ||
|
|
||
| public static boolean Greater(String ip1, String ip2) { | ||
| try { | ||
| IPAddress ipAddress1 = IPUtils.toAddress(ip1); | ||
| IPAddress ipAddress2 = IPUtils.toAddress(ip2); | ||
| return IPUtils.compare(ipAddress1, ipAddress2) > 0; | ||
| } catch (SemanticCheckException e) { | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| public static boolean Greater(String ip1, ExprIpValue ip2) { | ||
| String ipAddress2 = ip2.value(); | ||
| return Greater(ip1, ipAddress2); | ||
| } | ||
|
|
||
| public static boolean Greater(ExprIpValue ip1, String ip2) { | ||
| String ipAddress1 = ip1.value(); | ||
| return Greater(ipAddress1, ip2); | ||
| } | ||
|
|
||
| public static boolean Greater(ExprIpValue ip1, ExprIpValue ip2) { | ||
| String ipAddress1 = ip1.value(); | ||
| String ipAddress2 = ip2.value(); | ||
| return Greater(ipAddress1, ipAddress2); | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| /* | ||
| * Copyright OpenSearch Contributors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package org.opensearch.sql.expression.function.udf.ip; | ||
|
|
||
| import inet.ipaddr.IPAddress; | ||
| import java.util.List; | ||
| import org.apache.calcite.adapter.enumerable.NotNullImplementor; | ||
| import org.apache.calcite.adapter.enumerable.NullPolicy; | ||
| import org.apache.calcite.adapter.enumerable.RexToLixTranslator; | ||
| import org.apache.calcite.linq4j.tree.Expression; | ||
| import org.apache.calcite.linq4j.tree.Expressions; | ||
| import org.apache.calcite.rex.RexCall; | ||
| import org.apache.calcite.sql.type.*; | ||
| import org.opensearch.sql.data.model.ExprIpValue; | ||
| import org.opensearch.sql.exception.SemanticCheckException; | ||
| import org.opensearch.sql.expression.function.ImplementorUDF; | ||
| import org.opensearch.sql.expression.function.UDFOperandMetadata; | ||
| import org.opensearch.sql.utils.IPUtils; | ||
|
|
||
| /** | ||
| * {@code Gte(ip1, ip2)} checks if ip1 is greater than or equals ip2. | ||
| * | ||
| * <p>Signature: | ||
| * | ||
| * <ul> | ||
| * <li>(STRING, STRING) -> BOOLEAN | ||
| * <li>(IP, STRING) -> BOOLEAN | ||
| * <li>(STRING, IP) -> BOOLEAN | ||
| * <li>(IP, IP) -> BOOLEAN | ||
| * </ul> | ||
| */ | ||
| public class GteIpFunction extends ImplementorUDF { | ||
| public GteIpFunction() { | ||
| super(new GteImplementor(), NullPolicy.ANY); | ||
| } | ||
|
|
||
| @Override | ||
| public SqlReturnTypeInference getReturnTypeInference() { | ||
| return ReturnTypes.BOOLEAN_FORCE_NULLABLE; | ||
| } | ||
|
|
||
| @Override | ||
| public UDFOperandMetadata getOperandMetadata() { | ||
| return UDFOperandMetadata.wrap( | ||
| (CompositeOperandTypeChecker) | ||
| OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.NULL) | ||
| .or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.STRING)) | ||
| .or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.NULL))); | ||
| } | ||
|
|
||
| public static class GteImplementor implements NotNullImplementor { | ||
| @Override | ||
| public Expression implement( | ||
| RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) { | ||
| return Expressions.call(GteImplementor.class, "Gte", translatedOperands); | ||
| } | ||
|
|
||
| public static boolean Gte(String ip1, String ip2) { | ||
| try { | ||
| IPAddress ipAddress1 = IPUtils.toAddress(ip1); | ||
| IPAddress ipAddress2 = IPUtils.toAddress(ip2); | ||
| return IPUtils.compare(ipAddress1, ipAddress2) >= 0; | ||
| } catch (SemanticCheckException e) { | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| public static boolean Gte(String ip1, ExprIpValue ip2) { | ||
| String ipAddress2 = ip2.value(); | ||
| return Gte(ip1, ipAddress2); | ||
| } | ||
|
|
||
| public static boolean Gte(ExprIpValue ip1, String ip2) { | ||
| String ipAddress1 = ip1.value(); | ||
| return Gte(ipAddress1, ip2); | ||
| } | ||
|
|
||
| public static boolean Gte(ExprIpValue ip1, ExprIpValue ip2) { | ||
| String ipAddress1 = ip1.value(); | ||
| String ipAddress2 = ip2.value(); | ||
| return Gte(ipAddress1, ipAddress2); | ||
| } | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Questions, Do we need to support IP as UDT in PPL engine?
In OpenSearch PPL, IP handling depends on index field type. CIDR-based filtering should use ip_range queries for ip fields, script pushdown / in-memory processing for keyword, text, and runtime string fields.