Skip to content

Commit 3d558d9

Browse files
sanjeet006pySanjeet Malhotra
andauthored
PHOENIX-7659: Leverage = ANY() instead of big IN list to do huge number of point lookups in a single query (#2239)
--------- Co-authored-by: Sanjeet Malhotra <[email protected]>
1 parent 26444e0 commit 3d558d9

File tree

2 files changed

+1040
-0
lines changed

2 files changed

+1040
-0
lines changed

phoenix-core-client/src/main/java/org/apache/phoenix/compile/WhereOptimizer.java

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@
5151
import org.apache.phoenix.expression.OrExpression;
5252
import org.apache.phoenix.expression.RowKeyColumnExpression;
5353
import org.apache.phoenix.expression.RowValueConstructorExpression;
54+
import org.apache.phoenix.expression.function.ArrayAnyComparisonExpression;
55+
import org.apache.phoenix.expression.function.ArrayElemRefExpression;
5456
import org.apache.phoenix.expression.function.FunctionExpression.OrderPreserving;
5557
import org.apache.phoenix.expression.function.ScalarFunction;
5658
import org.apache.phoenix.expression.visitor.ExpressionVisitor;
@@ -76,6 +78,7 @@
7678
import org.apache.phoenix.schema.types.PDataType;
7779
import org.apache.phoenix.schema.types.PVarbinary;
7880
import org.apache.phoenix.schema.types.PVarchar;
81+
import org.apache.phoenix.schema.types.PhoenixArray;
7982
import org.apache.phoenix.util.ByteUtil;
8083
import org.apache.phoenix.util.ScanUtil;
8184
import org.apache.phoenix.util.SchemaUtil;
@@ -1945,6 +1948,152 @@ public KeySlots visitLeave(InListExpression node, List<KeySlots> childParts) {
19451948
return newKeyParts(childSlot, node, new ArrayList<KeyRange>(ranges));
19461949
}
19471950

1951+
/**
1952+
* If {@link ArrayAnyComparisonExpression} is of the form:
1953+
*
1954+
* <pre>
1955+
* COL = ANY(ARR)
1956+
* </pre>
1957+
*
1958+
* then we can extract the scan ranges for the COL, given COL is a PK column. This syntactical
1959+
* pattern can be used as a replacement for a IN expression. So, instead of following IN
1960+
* expression:
1961+
*
1962+
* <pre>
1963+
* COL IN (VAL1, VAL2, ... VALN)
1964+
* </pre>
1965+
*
1966+
* we can use the following ANY expression:
1967+
*
1968+
* <pre>
1969+
* try (Connection conn = DriverManager.getConnection(url)) {
1970+
* conn.createArrayOf("CHAR", new String[] {"VAL1", "VAL2", ... "VALN"});
1971+
* try (PreparedStatement stmt = conn.prepareStatement(
1972+
* "SELECT ... FROM TABLE WHERE COL = ANY(?)")) {
1973+
* stmt.setArray(1, arr);
1974+
* ResultSet rs = stmt.executeQuery();
1975+
* }
1976+
* }
1977+
* </pre>
1978+
*
1979+
* This will help in saving the query parsing time as on using IN list query parsing time
1980+
* increases with the size of IN list but in case of ANY expression it is constant. Below we
1981+
* account for cases where COL is on the LHS or RHS of the comparison expression.
1982+
* @param node {@link ArrayAnyComparisonExpression} node for which scan ranges are to
1983+
* be extracted
1984+
* @param keyExpressions {@link RowKeyColumnExpression} for the PK column for which scan ranges
1985+
* are to be extracted
1986+
* @return true if the scan ranges can be extracted, false otherwise
1987+
*/
1988+
private boolean shouldExtractKeyRangesForArrayAnyExpr(ArrayAnyComparisonExpression node,
1989+
List<Expression> keyExpressions) {
1990+
// {@link ArrayAnyComparisonExpression} has two children, and the second child is
1991+
// comparison expression
1992+
Expression childExpr = node.getChildren().get(1);
1993+
if (!(childExpr instanceof ComparisonExpression)) {
1994+
return false;
1995+
}
1996+
ComparisonExpression comparisonExpr = (ComparisonExpression) childExpr;
1997+
1998+
// Replacing IN() with =ANY() is only valid if the comparison operator is EQUAL
1999+
if (comparisonExpr.getFilterOp() != CompareOperator.EQUAL) {
2000+
return false;
2001+
}
2002+
2003+
// {@link ComparisonExpression} will have two children in this case, we need to make
2004+
// sure that one of them is a {@link RowKeyColumnExpression} and the other is a {@link
2005+
// ArrayElemRefExpression}. Further, the first child of {@link ArrayElemRefExpression}
2006+
// must be a {@link LiteralExpression}. The first child of {@link
2007+
// ArrayElemRefExpression} is same as the first child of {@link
2008+
// ArrayAnyComparisonExpression}.
2009+
Expression lhs = comparisonExpr.getChildren().get(0);
2010+
Expression rhs = comparisonExpr.getChildren().get(1);
2011+
if (lhs instanceof RowKeyColumnExpression && rhs instanceof ArrayElemRefExpression) {
2012+
ArrayElemRefExpression arrayElemRefExpr = (ArrayElemRefExpression) rhs;
2013+
if (!(arrayElemRefExpr.getChildren().get(0) instanceof LiteralExpression)) {
2014+
return false;
2015+
}
2016+
// Capture {@link RowKeyColumnExpression} for the generation of key slots.
2017+
keyExpressions.add(lhs);
2018+
2019+
} else if (lhs instanceof ArrayElemRefExpression && rhs instanceof RowKeyColumnExpression) {
2020+
ArrayElemRefExpression arrayElemRefExpr = (ArrayElemRefExpression) lhs;
2021+
if (!(arrayElemRefExpr.getChildren().get(0) instanceof LiteralExpression)) {
2022+
return false;
2023+
}
2024+
// Capture {@link RowKeyColumnExpression} for the generation of key slots.
2025+
keyExpressions.add(rhs);
2026+
} else {
2027+
return false;
2028+
}
2029+
return true;
2030+
}
2031+
2032+
@Override
2033+
public Iterator<Expression> visitEnter(ArrayAnyComparisonExpression node) {
2034+
ArrayList<Expression> keyExpressions = new ArrayList<>();
2035+
if (shouldExtractKeyRangesForArrayAnyExpr(node, keyExpressions)) {
2036+
return keyExpressions.iterator();
2037+
}
2038+
// If the scan ranges cannot be extracted, we return an empty iterator
2039+
return Collections.emptyIterator();
2040+
}
2041+
2042+
@Override
2043+
public KeySlots visitLeave(ArrayAnyComparisonExpression node, List<KeySlots> childParts) {
2044+
if (childParts == null || childParts.isEmpty()) {
2045+
return null;
2046+
}
2047+
// Doing type casting is safe here as we won't have reached here unless the expression
2048+
// tree is of the form expected by the method shouldExtractKeyRangesForArrayAnyExpr.
2049+
Expression arrayExpr = node.getChildren().get(0);
2050+
PhoenixArray arr = (PhoenixArray) ((LiteralExpression) arrayExpr).getValue();
2051+
int numElements = arr.getDimensions();
2052+
2053+
ComparisonExpression comparisonExpr = (ComparisonExpression) node.getChildren().get(1);
2054+
Expression lhsExpr = comparisonExpr.getChildren().get(0);
2055+
Expression rhsExpr = comparisonExpr.getChildren().get(1);
2056+
ArrayElemRefExpression arrayElemRefExpr;
2057+
if (lhsExpr instanceof ArrayElemRefExpression) {
2058+
arrayElemRefExpr = (ArrayElemRefExpression) lhsExpr;
2059+
} else {
2060+
arrayElemRefExpr = (ArrayElemRefExpression) rhsExpr;
2061+
}
2062+
2063+
KeySlots childSlots = childParts.get(0);
2064+
KeySlot childSlot = childSlots.getSlots().get(0);
2065+
KeyPart childPart = childSlot.getKeyPart();
2066+
PColumn column = childPart.getColumn();
2067+
2068+
List<KeyRange> keyRanges = new ArrayList<>();
2069+
try {
2070+
Expression coerceExpr = CoerceExpression.create(arrayElemRefExpr, column.getDataType(),
2071+
column.getSortOrder(), column.getMaxLength());
2072+
for (int i = 1; i <= numElements; i++) {
2073+
arrayElemRefExpr.setIndex(i);
2074+
KeyRange keyRange = childPart.getKeyRange(CompareOperator.EQUAL, coerceExpr);
2075+
if (
2076+
keyRange == null || keyRange == KeyRange.EMPTY_RANGE
2077+
|| keyRange == KeyRange.IS_NULL_RANGE
2078+
) {
2079+
// Skip null range along with empty range as null check is done via IS NULL as
2080+
// per SQL standards
2081+
continue;
2082+
}
2083+
keyRanges.add(keyRange);
2084+
}
2085+
} catch (Exception e) {
2086+
LOGGER.warn(
2087+
"Failed to wrap ArrayElemRefExpression with CoerceExpression for column: {} and type: {}",
2088+
column.getName().getString(), column.getDataType().getSqlTypeName(), e);
2089+
return super.visitLeave(node, childParts);
2090+
}
2091+
if (keyRanges.isEmpty()) {
2092+
return super.visitLeave(node, childParts);
2093+
}
2094+
return newKeyParts(childSlot, node, keyRanges);
2095+
}
2096+
19482097
@Override
19492098
public Iterator<Expression> visitEnter(IsNullExpression node) {
19502099
return Iterators.singletonIterator(node.getChildren().get(0));

0 commit comments

Comments
 (0)