-
Notifications
You must be signed in to change notification settings - Fork 4.8k
HIVE-26653: Wrong results when (map) joining multiple tables on parti… #6165
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -214,10 +214,7 @@ protected int generateHashMapResultSingleValue(VectorizedRowBatch batch, | |
| batch, batchIndex); | ||
| } | ||
|
|
||
| if (smallTableValueVectorDeserializeRow != null) { | ||
| doSmallTableValueDeserializeRow(batch, batchIndex, | ||
| byteSegmentRef, hashMapResult); | ||
| } | ||
| updateBatchWithSmallTableValue(batch, hashMapResult, batchIndex, byteSegmentRef); | ||
|
|
||
| // Use the big table row as output. | ||
| batch.selected[numSel++] = batchIndex; | ||
|
|
@@ -226,6 +223,29 @@ protected int generateHashMapResultSingleValue(VectorizedRowBatch batch, | |
| return numSel; | ||
| } | ||
|
|
||
| private void updateBatchWithSmallTableValue( | ||
| VectorizedRowBatch batch, VectorMapJoinHashMapResult hashMapResult, int batchIndex, ByteSegmentRef byteSegmentRef) | ||
| throws HiveException { | ||
|
|
||
| // Check if the small table value is empty. | ||
| boolean isSmallTableValueEmpty = byteSegmentRef.getLength() == 0; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The fact that we need to check the actual data in order to decide how to evaluate the join (or rather the creation of the resulting row) is somewhat suspicious and a bit brittle. Ideally, the compiler should be able to determine exactly how the operator should behave via the query plan. Can we exploit (or add) information in the query plan in order to drive the copy decision below? |
||
|
|
||
| // INNER join where only the small table key is projected. | ||
| // The hash map value for that key is empty, and deserializing it would produce NULLs. | ||
| // Instead, copy the big table key into the small table value position. | ||
| // This preserves the joining key in the output without introducing NULLs. | ||
| // (Applied only when bigTableKeyToSmallTableValueCopy is available.) | ||
| if (isSmallTableValueEmpty && bigTableKeyToSmallTableValueCopy != null) { | ||
| bigTableKeyToSmallTableValueCopy.copyByValue( | ||
| batch, batchIndex, // from big table key area | ||
| batch, batchIndex // to small table value area | ||
| ); | ||
| } else if (smallTableValueVectorDeserializeRow != null) { | ||
| doSmallTableValueDeserializeRow(batch, batchIndex, | ||
| byteSegmentRef, hashMapResult); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Generate results for a N x M cross product. | ||
| * | ||
|
|
@@ -295,11 +315,7 @@ protected void generateHashMapResultMultiValue(VectorizedRowBatch batch, | |
| overflowBatch, overflowBatch.size); | ||
| } | ||
|
|
||
| if (smallTableValueVectorDeserializeRow != null) { | ||
|
|
||
| doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, | ||
| byteSegmentRef, hashMapResult); | ||
| } | ||
| updateBatchWithSmallTableValue(overflowBatch, hashMapResult, overflowBatch.size, byteSegmentRef); | ||
|
|
||
| overflowBatch.size++; | ||
| if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { | ||
|
|
@@ -339,12 +355,7 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, | |
|
|
||
| // Fill up as much of the overflow batch as possible with small table values. | ||
| while (byteSegmentRef != null) { | ||
|
|
||
| if (smallTableValueVectorDeserializeRow != null) { | ||
| doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, | ||
| byteSegmentRef, hashMapResult); | ||
| } | ||
|
|
||
| updateBatchWithSmallTableValue(overflowBatch, hashMapResult, overflowBatch.size, byteSegmentRef); | ||
| overflowBatch.size++; | ||
| if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { | ||
| break; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -297,14 +297,25 @@ public static void generateVariationData(MapJoinTestData testData, | |
|
|
||
| private static RowTestObjects generateRandomSmallTableValueRow(MapJoinTestDescription testDesc, Random random) { | ||
|
|
||
| final ValueOption valueOption = testDesc.getSmallTableGenerationParameters().getValueOption(); | ||
| final int columnCount = testDesc.smallTableValueTypeInfos.length; | ||
| PrimitiveTypeInfo[] primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; | ||
| for (int i = 0; i < columnCount; i++) { | ||
| primitiveTypeInfos[i] = (PrimitiveTypeInfo) testDesc.smallTableValueTypeInfos[i]; | ||
| } | ||
| Object[] smallTableValueRow = | ||
| VectorRandomRowSource.randomWritablePrimitiveRow( | ||
| columnCount, random, primitiveTypeInfos); | ||
| Object[] smallTableValueRow; | ||
|
|
||
| // Generate empty value row if specified. | ||
| if (valueOption == ValueOption.EMPTY_VALUE) { | ||
| smallTableValueRow = new Object[columnCount]; | ||
| for (int c = 0; c < columnCount; c++) { | ||
| smallTableValueRow[c] = | ||
| VectorizedBatchUtil.getPrimitiveWritable(primitiveTypeInfos[c].getPrimitiveCategory()); | ||
|
Comment on lines
+312
to
+313
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Which real use-case is this code trying to simulate? Is this equivalent to having From a quick look, it seems that we are using special values (e.g., |
||
| } | ||
| } else { | ||
| smallTableValueRow = VectorRandomRowSource.randomWritablePrimitiveRow( | ||
| columnCount, random, primitiveTypeInfos); | ||
| } | ||
| for (int c = 0; c < smallTableValueRow.length; c++) { | ||
| smallTableValueRow[c] = ((PrimitiveObjectInspector) testDesc.smallTableValueObjectInspectors[c]).copyObject(smallTableValueRow[c]); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ | |
|
|
||
| import org.apache.hadoop.hive.conf.HiveConf; | ||
| import org.apache.hadoop.hive.ql.exec.util.DescriptionTest; | ||
| import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; | ||
| import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; | ||
| import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; | ||
| import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; | ||
|
|
@@ -44,7 +45,8 @@ public static class SmallTableGenerationParameters { | |
| public static enum ValueOption { | ||
| NO_RESTRICTION, | ||
| ONLY_ONE, | ||
| NO_REGULAR_SMALL_KEYS | ||
| NO_REGULAR_SMALL_KEYS, | ||
| EMPTY_VALUE, // Generate empty value entries. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By going over the code, I get the impression that the |
||
| } | ||
|
|
||
| private ValueOption valueOption; | ||
|
|
@@ -135,6 +137,8 @@ public int getNoMatchKeyOutOfAThousand() { | |
| public ObjectInspector[] outputObjectInspectors; | ||
|
|
||
| final MapJoinPlanVariation mapJoinPlanVariation; | ||
| public MapJoinTestImplementation [] implementations; | ||
| public boolean shouldCheckForExpectedOutputAndFail = false; | ||
|
|
||
| public MapJoinTestDescription ( | ||
| HiveConf hiveConf, | ||
|
|
@@ -183,6 +187,8 @@ public MapJoinTestDescription ( | |
| this.smallTableGenerationParameters = smallTableGenerationParameters; | ||
|
|
||
| this.mapJoinPlanVariation = mapJoinPlanVariation; | ||
|
|
||
| this.implementations = MapJoinTestImplementation.values(); | ||
|
|
||
| computeDerived(); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -43,8 +43,10 @@ | |
| import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; | ||
| import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; | ||
| import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; | ||
| import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; | ||
| import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; | ||
| import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; | ||
| import org.jetbrains.annotations.NotNull; | ||
| import org.junit.Test; | ||
| import org.junit.Ignore; | ||
|
|
||
|
|
@@ -53,6 +55,10 @@ | |
|
|
||
| import org.junit.Assert; | ||
|
|
||
| import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.intTypeInfo; | ||
| import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.longTypeInfo; | ||
| import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo; | ||
|
|
||
| public class TestMapJoinOperator { | ||
|
|
||
| private boolean addLongHiveConfVariation(int hiveConfVariation, HiveConf hiveConf) { | ||
|
|
@@ -1343,6 +1349,93 @@ public void testString2() throws Exception { | |
| } while (!hiveConfVariationsDone); | ||
| } | ||
|
|
||
| /** | ||
| * Test case for INNER vector map join with only small table key projection - string type. | ||
| * | ||
| * @throws Exception Exception | ||
| */ | ||
| @Test | ||
| public void testSmallTableKeyOnlyProjectionWithEmptyValueString() throws Exception { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding tests in this class are useful but it may not be the best option in every situation. These tests depend on random generation of input/output and they are good for covering general behavior of the joins operators but for edge cases and very specific bugs having fixed input & output and join configuration would be much easier to reason about. For showcasing the bug in this PR (if there is one), it would really help to have a dedicated test case possibly in another class and have well-defined and minimal input/output and join settings. Then we can discuss if we also need these randomized tests. The bug implies a problem in a binary join operator so we should be able to demonstrate the issue by correctly picking the schema/data for the left and right side of the join having a few rows on each side. |
||
| long seed = 26653L; | ||
| int rowCount = 100; | ||
|
|
||
| MapJoinTestDescription testDesc = getTestDescriptionForSmallTableEmptyValue(stringTypeInfo); | ||
|
|
||
| // Create the test data. | ||
| MapJoinTestData testData = new MapJoinTestData(rowCount, testDesc, seed); | ||
|
|
||
| executeTest(testDesc, testData, "testSmallTableKeyOnlyProjectionWithEmptyValueString"); | ||
| } | ||
|
|
||
| /** | ||
| * Test case for INNER vector map join with only small table key projection - int type. | ||
| * | ||
| * @throws Exception Exception | ||
| */ | ||
| @Test | ||
| public void testSmallTableKeyOnlyProjectionWithEmptyValueInt() throws Exception { | ||
| long seed = 26653L; | ||
| int rowCount = 100; | ||
|
|
||
| MapJoinTestDescription testDesc = getTestDescriptionForSmallTableEmptyValue(intTypeInfo); | ||
|
|
||
| // Create the test data. | ||
| MapJoinTestData testData = new MapJoinTestData(rowCount, testDesc, seed); | ||
|
|
||
| executeTest(testDesc, testData, "testSmallTableKeyOnlyProjectionWithEmptyValueInt"); | ||
| } | ||
|
|
||
| /** | ||
| * Test case for INNER vector map join with only small table key projection - long type. | ||
| * | ||
| * @throws Exception Exception | ||
| */ | ||
| @Test | ||
| public void testSmallTableKeyOnlyProjectionWithEmptyValueLong() throws Exception { | ||
| long seed = 26653L; | ||
| int rowCount = 100; | ||
|
|
||
| MapJoinTestDescription testDesc = getTestDescriptionForSmallTableEmptyValue(longTypeInfo); | ||
| // Increase the probability of a big table key being present in the small table. | ||
| // 200 out of 1000 keys in small table. | ||
| testDesc.smallTableGenerationParameters.setKeyOutOfAThousand(200); | ||
|
|
||
| // Create the test data. | ||
| MapJoinTestData testData = new MapJoinTestData(rowCount, testDesc, seed); | ||
|
|
||
| executeTest(testDesc, testData, "testSmallTableKeyOnlyProjectionWithEmptyValueLong"); | ||
| } | ||
|
|
||
| private @NotNull MapJoinTestDescription getTestDescriptionForSmallTableEmptyValue(PrimitiveTypeInfo typeInfo) { | ||
| // Define BigTable | ||
| TypeInfo[] bigTableTypeInfos = new TypeInfo[] {typeInfo}; | ||
| int[] bigTableKeyColumnNums = new int[] { 0 }; // key is column 0 | ||
|
|
||
| // Define SmallTable | ||
| TypeInfo[] smallTableValueTypeInfos = new TypeInfo[] {typeInfo}; | ||
| int[] smallTableRetainKeyColumnNums = new int[] { 0 }; // retain key column 0 | ||
|
|
||
| // Generate empty values for small table | ||
| SmallTableGenerationParameters smallTableGenParams = new SmallTableGenerationParameters(); | ||
| smallTableGenParams.setValueOption(ValueOption.EMPTY_VALUE); | ||
|
|
||
| // Create an INNER MapJoin test description | ||
| MapJoinTestDescription mapJoinTestDescription = new MapJoinTestDescription( | ||
| getHiveConf(), | ||
| VectorMapJoinVariation.INNER, | ||
| bigTableTypeInfos, | ||
| bigTableKeyColumnNums, | ||
| smallTableValueTypeInfos, | ||
| smallTableRetainKeyColumnNums, | ||
| smallTableGenParams, | ||
| MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); | ||
| mapJoinTestDescription.implementations = | ||
| new MapJoinTestImplementation[] {MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED}; | ||
| mapJoinTestDescription.shouldCheckForExpectedOutputAndFail = true; | ||
|
|
||
| return mapJoinTestDescription; | ||
| } | ||
|
|
||
| public boolean doTestString2(long seed, int hiveConfVariation, | ||
| VectorMapJoinVariation vectorMapJoinVariation, | ||
| MapJoinPlanVariation mapJoinPlanVariation) throws Exception { | ||
|
|
@@ -1512,8 +1605,17 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript | |
| Object[] valueRow = valueList.get(v).getRow(); | ||
| final int smallTableRetainValueColumnNumsLength = | ||
| testDesc.smallTableRetainValueColumnNums.length; | ||
|
|
||
| // When EMPTY_VALUE is specified, the small table value columns are not | ||
| // actually stored in the small table HashMap. So we have to simulate that here. | ||
| // The expected output should have values from the big table key columns. | ||
| final boolean isEmptyValue = | ||
| testDesc.smallTableGenerationParameters.getValueOption() == ValueOption.EMPTY_VALUE && | ||
| testDesc.smallTableRetainValueColumnNums.length > 0 && | ||
| testDesc.smallTableRetainValueColumnNums.length == testDesc.bigTableKeyColumnNums.length; | ||
|
Comment on lines
+1612
to
+1615
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This simulation is problematic cause it makes the solution and the test code somewhat identical. We're implementing a copy logic in two places (prod & test) so the tests will trivially pass as it is right now and immediately fail if the implementation changes in the future. |
||
|
|
||
| for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { | ||
| outputObjects[outputColumnNum++] = | ||
| outputObjects[outputColumnNum++] = isEmptyValue ? bigTableKeyObjects[o] : | ||
| valueRow[testDesc.smallTableRetainValueColumnNums[o]]; | ||
| } | ||
|
|
||
|
|
@@ -1773,7 +1875,7 @@ private void doExecuteTest(MapJoinTestDescription testDesc, MapJoinTestData test | |
| " totalValueCount " + expectedTestRowMultiSet.getTotalValueCount()); | ||
|
|
||
| // Execute all implementation variations. | ||
| for (MapJoinTestImplementation mapJoinImplementation : MapJoinTestImplementation.values()) { | ||
| for (MapJoinTestImplementation mapJoinImplementation : testDesc.implementations) { | ||
|
|
||
| if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && | ||
| mapJoinImplementation == MapJoinTestImplementation.ROW_MODE_HASH_MAP) { | ||
|
|
@@ -1981,6 +2083,11 @@ private void executeTestImplementation( | |
| " for implementation " + mapJoinImplementation + | ||
| " variation " + testDesc.vectorMapJoinVariation + option); | ||
| expectedTestRowMultiSet.displayDifferences(outputTestRowMultiSet, "expected", "actual"); | ||
|
|
||
| if (testDesc.shouldCheckForExpectedOutputAndFail) { | ||
| Assert.fail(title + " failed for implementation " + mapJoinImplementation + | ||
| " variation " + testDesc.vectorMapJoinVariation + option); | ||
| } | ||
| } else { | ||
| System.out.println("*BENCHMARK* " + title + " verify succeeded " + | ||
| " for implementation " + mapJoinImplementation + | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand the reasoning/intuition behind this check. Why do we care about values and keys being of the same length?