Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,10 @@ public static SqlTypeName convertRelDataTypeToSqlTypeName(RelDataType type) {
case EXPR_DATE -> SqlTypeName.DATE;
case EXPR_TIME -> SqlTypeName.TIME;
case EXPR_TIMESTAMP -> SqlTypeName.TIMESTAMP;
case EXPR_IP -> SqlTypeName.VARCHAR;
// EXPR_IP is mapped to SqlTypeName.NULL since there is no
// corresponding SqlTypeName in Calcite. This is a workaround to allow
// type checking for IP types in UDFs.
case EXPR_IP -> SqlTypeName.NULL;
case EXPR_BINARY -> SqlTypeName.VARBINARY;
default -> type.getSqlTypeName();
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
import org.opensearch.sql.expression.function.udf.datetime.WeekFunction;
import org.opensearch.sql.expression.function.udf.datetime.WeekdayFunction;
import org.opensearch.sql.expression.function.udf.datetime.YearweekFunction;
import org.opensearch.sql.expression.function.udf.ip.CidrMatchFunction;
import org.opensearch.sql.expression.function.udf.ip.*;
import org.opensearch.sql.expression.function.udf.math.CRC32Function;
import org.opensearch.sql.expression.function.udf.math.ConvFunction;
import org.opensearch.sql.expression.function.udf.math.DivideFunction;
Expand Down Expand Up @@ -102,6 +102,14 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
public static final SqlOperator SHA2 = CryptographicFunction.sha2().toUDF("SHA2");
public static final SqlOperator CIDRMATCH = new CidrMatchFunction().toUDF("CIDRMATCH");

// IP comparing functions
public static final SqlOperator NOT_EQUALS_IP = new NotEqualsIpFunction().toUDF("NOT_EQUALS_IP");
public static final SqlOperator EQUALS_IP = new EqualsIpFunction().toUDF("EQUALS_IP");
public static final SqlOperator GREATER_IP = new GreaterIpFunction().toUDF("GREATER_IP");
public static final SqlOperator GTE_IP = new GteIpFunction().toUDF("GTE_IP");
public static final SqlOperator LESS_IP = new LessIpFunction().toUDF("LESS_IP");
public static final SqlOperator LTE_IP = new LteIpFunction().toUDF("LTE_IP");

// Condition function
public static final SqlOperator EARLIEST = new EarliestFunction().toUDF("EARLIEST");
public static final SqlOperator LATEST = new LatestFunction().toUDF("LATEST");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,14 @@ public PPLTypeChecker getTypeChecker() {
}

void populate() {
// register operators for IP comparing
registerOperator(NOTEQUAL, PPLBuiltinOperators.NOT_EQUALS_IP);
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Questions, Do we need to support IP as UDT in PPL engine?

In OpenSearch PPL, IP handling depends on index field type. CIDR-based filtering should use ip_range queries for ip fields, script pushdown / in-memory processing for keyword, text, and runtime string fields.

Field Type Use Case Expectation
IP Field search index=log ip="192.168.0.0/16" Rewrite as term query
  search index | where cidrmatch("192.168.0.0/16", ip) Rewrite as term query
Keyword Field search index=log ip="192.168.0.0/16" Rewrite as term query, extactally keyword match
  search index | where cidrmatch("192.168.0.0/16", ip) Script pushdown — ip field is a string, not rewrite as term query.
Text Field search index=log ip="192.168.0.0/16" Rewrite as query_string query, full text search
  search index | where cidrmatch("192.168.0.0/16", ip) Script pushdown — ip field is a string, not rewrite as term query.
Runtime Field search index=log | parse ip=regex(...)| where ip="192.168.0.0/16" Script pushdown — ip field is a string, it is a string comparsion query
  search index=log | parse ip=regex(...)| where cidrmatch("192.168.0.0/16", ip) Script pushdown — ip field is a string, not rewrite as term query.

registerOperator(EQUAL, PPLBuiltinOperators.EQUALS_IP);
registerOperator(GREATER, PPLBuiltinOperators.GREATER_IP);
registerOperator(GTE, PPLBuiltinOperators.GTE_IP);
registerOperator(LESS, PPLBuiltinOperators.LESS_IP);
registerOperator(LTE, PPLBuiltinOperators.LTE_IP);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SqlStdOperatorTable.EQUALS` is a very basic std operator which widely used in calcite internal, I am worry that it may introduce potential bugs and performance regression for pushdown.

Copy link
Copy Markdown
Collaborator

@yuancu yuancu Jul 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But Calcite's built-in operators like SqlStdOperatorTable.EQUALS does not handle our UDT like IP. These new operators are only effective to IP comparison, controlled via type checkers. Comparison between other types will still falls to Calcite's built-in comparison operators.

There was two solutions:

  • one is to convert IP UDT to a type that is comparable by calcite's comparators.
  • another is to add new operators exclusively for IP comparision

We opted the latter.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In order to keep the comparison logic the same as v2, I chose to add these specific operator udfs for IP comparison instead of converting it to string type that calcite can compare.

Copy link
Copy Markdown
Member

@LantaoJin LantaoJin Jul 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These new operators are only effective to IP comparison, controlled via type checkers. Comparison between other types will still falls to Calcite's built-in comparison operators.

Can we move above logic to a specific method registerOverrideOperator, it quite confused me.

Copy link
Copy Markdown
Member

@LantaoJin LantaoJin Jul 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or modify to

registerOperator(EQUAL, check(PPLBuiltinOperators.EQUALS_IP,SqlStdOperatorTable.EQUALS));

and leverage check() to manage the specific register logic via typeChecker.

check() can name to case() or set() or queue()...

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or modify to

registerOperator(EQUAL, check(PPLBuiltinOperators.EQUALS_IP,SqlStdOperatorTable.EQUALS));

and leverage check() to manage the specific register logic via typeChecker.

check() can name to case() or set() or queue()...

Modified, improving codes readability.


// Register std operator
registerOperator(AND, SqlStdOperatorTable.AND);
registerOperator(OR, SqlStdOperatorTable.OR);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,10 @@ private static List<ExprType> getExprTypes(SqlTypeFamily family) {
OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER));
case ANY, IGNORE -> List.of(
OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.ANY));
// We borrow SqlTypeFamily.NULL to represent EXPR_IP. This is a workaround
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we stop use CompositeOperandTypeChecker and create a new TypeChecker ourself, maybe we can avoid using SqlTypeFamily.NULL to represent EXPR_IP and check operands on RelDataType level.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@qianheng-aws How about this implementation ishaoxy#1
It doesn't use NULL for IP, but created two other classes for IP type checking.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It just came to me that there's not only compare that checks IP type. CIDR_MATCH also has to validate IP types. If we make a specific IP type checker for compare operators, we may also have to create one for each other functions like cidrmatch and geoip.

// since there is no corresponding IP type family in Calcite.
case NULL -> List.of(
OpenSearchTypeFactory.TYPE_FACTORY.createUDT(OpenSearchTypeFactory.ExprUDT.EXPR_IP));
default -> {
RelDataType type = family.getDefaultConcreteType(OpenSearchTypeFactory.TYPE_FACTORY);
if (type == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@
import org.apache.calcite.linq4j.tree.Expression;
import org.apache.calcite.linq4j.tree.Expressions;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.type.ReturnTypes;
import org.apache.calcite.sql.type.SqlReturnTypeInference;
import org.apache.calcite.sql.type.*;
Comment thread
ishaoxy marked this conversation as resolved.
Outdated
import org.opensearch.sql.data.model.ExprIpValue;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.model.ExprValueUtils;
Expand Down Expand Up @@ -44,9 +42,12 @@ public SqlReturnTypeInference getReturnTypeInference() {

@Override
public UDFOperandMetadata getOperandMetadata() {
// EXPR_IP is mapped to SqlTypeFamily.VARCHAR in
// EXPR_IP is mapped to SqlTypeFamily.NULL in
// UserDefinedFunctionUtils.convertRelDataTypeToSqlTypeName
return UDFOperandMetadata.wrap(OperandTypes.STRING_STRING);
return UDFOperandMetadata.wrap(
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should use new PPLTypeChecker here and override its checkOperandTypes method to only allow accept string or ip.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Handled.

(CompositeOperandTypeChecker)
OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING)
.or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.STRING)));
}

public static class CidrMatchImplementor implements NotNullImplementor {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression.function.udf.ip;

import inet.ipaddr.IPAddress;
import java.util.List;
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
import org.apache.calcite.adapter.enumerable.NullPolicy;
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
import org.apache.calcite.linq4j.tree.Expression;
import org.apache.calcite.linq4j.tree.Expressions;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.sql.type.*;
import org.opensearch.sql.data.model.ExprIpValue;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.function.ImplementorUDF;
import org.opensearch.sql.expression.function.UDFOperandMetadata;
import org.opensearch.sql.utils.IPUtils;

/**
* {@code Equals(ip1, ip2)} checks if two IP addresses are equal.
*
* <p>Signature:
*
* <ul>
* <li>(STRING, STRING) -> BOOLEAN
* <li>(IP, STRING) -> BOOLEAN
Comment thread
ishaoxy marked this conversation as resolved.
Outdated
* <li>(STRING, IP) -> BOOLEAN
* <li>(IP, IP) -> BOOLEAN
* </ul>
*/
public class EqualsIpFunction extends ImplementorUDF {
Comment thread
ishaoxy marked this conversation as resolved.
Outdated
public EqualsIpFunction() {
super(new EqualsImplementor(), NullPolicy.ANY);
}

@Override
public SqlReturnTypeInference getReturnTypeInference() {
return ReturnTypes.BOOLEAN_FORCE_NULLABLE;
}

@Override
public UDFOperandMetadata getOperandMetadata() {
return UDFOperandMetadata.wrap(
(CompositeOperandTypeChecker)
OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.NULL)
.or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.STRING))
.or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.NULL)));
}

public static class EqualsImplementor implements NotNullImplementor {
@Override
public Expression implement(
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
return Expressions.call(EqualsImplementor.class, "Equals", translatedOperands);
}

public static boolean Equals(String ip1, String ip2) {
try {
IPAddress ipAddress1 = IPUtils.toAddress(ip1);
IPAddress ipAddress2 = IPUtils.toAddress(ip2);
return IPUtils.compare(ipAddress1, ipAddress2) == 0;
} catch (SemanticCheckException e) {
return false;
}
}

public static boolean Equals(String ip1, ExprIpValue ip2) {
Comment thread
ishaoxy marked this conversation as resolved.
Outdated
String ipAddress2 = ip2.value();
return Equals(ip1, ipAddress2);
}

public static boolean Equals(ExprIpValue ip1, String ip2) {
String ipAddress1 = ip1.value();
return Equals(ipAddress1, ip2);
}

public static boolean Equals(ExprIpValue ip1, ExprIpValue ip2) {
String ipAddress1 = ip1.value();
String ipAddress2 = ip2.value();
return Equals(ipAddress1, ipAddress2);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression.function.udf.ip;

import inet.ipaddr.IPAddress;
import java.util.List;
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
import org.apache.calcite.adapter.enumerable.NullPolicy;
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
import org.apache.calcite.linq4j.tree.Expression;
import org.apache.calcite.linq4j.tree.Expressions;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.sql.type.*;
import org.opensearch.sql.data.model.ExprIpValue;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.function.ImplementorUDF;
import org.opensearch.sql.expression.function.UDFOperandMetadata;
import org.opensearch.sql.utils.IPUtils;

/**
* {@code Greater(ip1, ip2)} checks if ip1 is greater than ip2.
*
* <p>Signature:
*
* <ul>
* <li>(STRING, STRING) -> BOOLEAN
* <li>(IP, STRING) -> BOOLEAN
* <li>(STRING, IP) -> BOOLEAN
* <li>(IP, IP) -> BOOLEAN
* </ul>
*/
public class GreaterIpFunction extends ImplementorUDF {
public GreaterIpFunction() {
super(new GreaterImplementor(), NullPolicy.ANY);
}

@Override
public SqlReturnTypeInference getReturnTypeInference() {
return ReturnTypes.BOOLEAN_FORCE_NULLABLE;
}

@Override
public UDFOperandMetadata getOperandMetadata() {
return UDFOperandMetadata.wrap(
(CompositeOperandTypeChecker)
OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.NULL)
.or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.STRING))
.or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.NULL)));
}

public static class GreaterImplementor implements NotNullImplementor {
@Override
public Expression implement(
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
return Expressions.call(GreaterImplementor.class, "Greater", translatedOperands);
}

public static boolean Greater(String ip1, String ip2) {
try {
IPAddress ipAddress1 = IPUtils.toAddress(ip1);
IPAddress ipAddress2 = IPUtils.toAddress(ip2);
return IPUtils.compare(ipAddress1, ipAddress2) > 0;
} catch (SemanticCheckException e) {
return false;
}
}

public static boolean Greater(String ip1, ExprIpValue ip2) {
String ipAddress2 = ip2.value();
return Greater(ip1, ipAddress2);
}

public static boolean Greater(ExprIpValue ip1, String ip2) {
String ipAddress1 = ip1.value();
return Greater(ipAddress1, ip2);
}

public static boolean Greater(ExprIpValue ip1, ExprIpValue ip2) {
String ipAddress1 = ip1.value();
String ipAddress2 = ip2.value();
return Greater(ipAddress1, ipAddress2);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression.function.udf.ip;

import inet.ipaddr.IPAddress;
import java.util.List;
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
import org.apache.calcite.adapter.enumerable.NullPolicy;
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
import org.apache.calcite.linq4j.tree.Expression;
import org.apache.calcite.linq4j.tree.Expressions;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.sql.type.*;
import org.opensearch.sql.data.model.ExprIpValue;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.function.ImplementorUDF;
import org.opensearch.sql.expression.function.UDFOperandMetadata;
import org.opensearch.sql.utils.IPUtils;

/**
* {@code Gte(ip1, ip2)} checks if ip1 is greater than or equals ip2.
*
* <p>Signature:
*
* <ul>
* <li>(STRING, STRING) -> BOOLEAN
* <li>(IP, STRING) -> BOOLEAN
* <li>(STRING, IP) -> BOOLEAN
* <li>(IP, IP) -> BOOLEAN
* </ul>
*/
public class GteIpFunction extends ImplementorUDF {
public GteIpFunction() {
super(new GteImplementor(), NullPolicy.ANY);
}

@Override
public SqlReturnTypeInference getReturnTypeInference() {
return ReturnTypes.BOOLEAN_FORCE_NULLABLE;
}

@Override
public UDFOperandMetadata getOperandMetadata() {
return UDFOperandMetadata.wrap(
(CompositeOperandTypeChecker)
OperandTypes.family(SqlTypeFamily.STRING, SqlTypeFamily.NULL)
.or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.STRING))
.or(OperandTypes.family(SqlTypeFamily.NULL, SqlTypeFamily.NULL)));
}

public static class GteImplementor implements NotNullImplementor {
@Override
public Expression implement(
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
return Expressions.call(GteImplementor.class, "Gte", translatedOperands);
}

public static boolean Gte(String ip1, String ip2) {
try {
IPAddress ipAddress1 = IPUtils.toAddress(ip1);
IPAddress ipAddress2 = IPUtils.toAddress(ip2);
return IPUtils.compare(ipAddress1, ipAddress2) >= 0;
} catch (SemanticCheckException e) {
return false;
}
}

public static boolean Gte(String ip1, ExprIpValue ip2) {
String ipAddress2 = ip2.value();
return Gte(ip1, ipAddress2);
}

public static boolean Gte(ExprIpValue ip1, String ip2) {
String ipAddress1 = ip1.value();
return Gte(ipAddress1, ip2);
}

public static boolean Gte(ExprIpValue ip1, ExprIpValue ip2) {
String ipAddress1 = ip1.value();
String ipAddress2 = ip2.value();
return Gte(ipAddress1, ipAddress2);
}
}
}
Loading
Loading