-
Notifications
You must be signed in to change notification settings - Fork 2k
[kernel-spark] Implement availableNow trigger support for dsv2 streaming #5585
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1102,6 +1102,137 @@ private static Stream<Arguments> noNewDataAtLatestVersionParameters() { | |
| Arguments.of(0L, 1L, BASE_INDEX, "Latest version index=0, no new data")); | ||
| } | ||
|
|
||
| // ================================================================================================ | ||
| // Tests for availableNow parity between DSv1 and DSv2 | ||
| // ================================================================================================ | ||
|
|
||
| @ParameterizedTest | ||
| @MethodSource("availableNowParameters") | ||
| public void testAvailableNow_SequentialBatchAdvancement( | ||
| Long startVersion, | ||
| Long startIndex, | ||
| ReadLimitConfig limitConfig, | ||
| int numIterations, | ||
| String testDescription, | ||
| @TempDir File tempDir) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. its really hard to understand what is going on in this test. please add more inline comments explaining what you are doing. |
||
| String testTablePath = tempDir.getAbsolutePath(); | ||
| String testTableName = | ||
| "test_availableNow_sequential" | ||
| + Math.abs(testDescription.hashCode()) | ||
| + "_" | ||
| + System.nanoTime(); | ||
| createEmptyTestTable(testTablePath, testTableName); | ||
| insertVersions( | ||
| testTableName, | ||
| /* numVersions= */ 5, | ||
| /* rowsPerVersion= */ 10, | ||
| /* includeEmptyVersion= */ true); | ||
|
|
||
| DeltaLog deltaLog = DeltaLog.forTable(spark, new Path(testTablePath)); | ||
| String tableId = deltaLog.tableId(); | ||
|
|
||
| DeltaSourceOffset startOffset = | ||
| new DeltaSourceOffset(tableId, startVersion, startIndex, /* isInitialSnapshot= */ false); | ||
| ReadLimit readLimit = limitConfig.toReadLimit(); | ||
|
|
||
| // dsv1 | ||
| DeltaSource deltaSource = createDeltaSource(deltaLog, testTablePath); | ||
| deltaSource.prepareForTriggerAvailableNow(); | ||
| List<Offset> dsv1Offsets = | ||
| advanceOffsetSequenceDsv1(deltaSource, startOffset, numIterations, readLimit); | ||
|
|
||
| // dsv2 | ||
| Configuration hadoopConf = new Configuration(); | ||
| PathBasedSnapshotManager snapshotManager = | ||
| new PathBasedSnapshotManager(testTablePath, hadoopConf); | ||
| SparkMicroBatchStream stream = | ||
| new SparkMicroBatchStream( | ||
| snapshotManager, snapshotManager.loadLatestSnapshot(), hadoopConf); | ||
| stream.prepareForTriggerAvailableNow(); | ||
| List<Offset> dsv2Offsets = | ||
| advanceOffsetSequenceDsv2(stream, startOffset, numIterations, readLimit); | ||
|
|
||
| compareOffsetSequence(dsv1Offsets, dsv2Offsets, testDescription); | ||
| } | ||
|
|
||
| private static Stream<Arguments> availableNowParameters() { | ||
| long BASE_INDEX = DeltaSourceOffset.BASE_INDEX(); | ||
| long END_INDEX = DeltaSourceOffset.END_INDEX(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for my knowledge.. what does these 2 mean? |
||
|
|
||
| return Stream.of( | ||
| // No limits | ||
| Arguments.of( | ||
| /* startVersion= */ 0L, | ||
| /* startIndex= */ BASE_INDEX, | ||
| ReadLimitConfig.noLimit(), | ||
| /* numIterations= */ 3, | ||
| "NoLimits1"), | ||
| Arguments.of( | ||
| /* startVersion= */ 1L, | ||
| /* startIndex= */ BASE_INDEX, | ||
| ReadLimitConfig.noLimit(), | ||
| /* numIterations= */ 3, | ||
| "NoLimits2"), | ||
| Arguments.of( | ||
| /* startVersion= */ 4L, | ||
| /* startIndex= */ END_INDEX, | ||
| ReadLimitConfig.noLimit(), | ||
| /* numIterations= */ 3, | ||
| "NoLimits3"), | ||
|
|
||
| // Max files | ||
| Arguments.of( | ||
| /* startVersion= */ 0L, | ||
| /* startIndex= */ BASE_INDEX, | ||
| ReadLimitConfig.maxFiles(1), | ||
| /* numIterations= */ 10, | ||
| "MaxFiles1"), | ||
| Arguments.of( | ||
| /* startVersion= */ 0L, | ||
| /* startIndex= */ BASE_INDEX, | ||
| ReadLimitConfig.maxFiles(1000), | ||
| /* numIterations= */ 3, | ||
| "MaxFiles2"), | ||
| Arguments.of( | ||
| /* startVersion= */ 1L, | ||
| /* startIndex= */ BASE_INDEX, | ||
| ReadLimitConfig.maxFiles(2), | ||
| /* numIterations= */ 10, | ||
| "MaxFiles3"), | ||
| Arguments.of( | ||
| /* startVersion= */ 0L, | ||
| /* startIndex= */ BASE_INDEX, | ||
| ReadLimitConfig.maxFiles(0), | ||
| /* numIterations= */ 3, | ||
| "MaxFiles4"), | ||
|
|
||
| // Max bytes | ||
| Arguments.of( | ||
| /* startVersion= */ 0L, | ||
| /* startIndex= */ BASE_INDEX, | ||
| ReadLimitConfig.maxBytes(1), | ||
| /* numIterations= */ 100, | ||
| "MaxBytes1"), | ||
| Arguments.of( | ||
| /* startVersion= */ 0L, | ||
| /* startIndex= */ BASE_INDEX, | ||
| ReadLimitConfig.maxBytes(1000000), // ensure larger than total file size | ||
| /* numIterations= */ 3, | ||
| "MaxBytes2"), | ||
| Arguments.of( | ||
| /* startVersion= */ 1L, | ||
| /* startIndex= */ BASE_INDEX, | ||
| ReadLimitConfig.maxBytes(1000), | ||
| /* numIterations= */ 100, | ||
| "MaxBytes3"), | ||
| Arguments.of( | ||
| /* startVersion= */ 0L, | ||
| /* startIndex= */ BASE_INDEX, | ||
| ReadLimitConfig.maxBytes(0), | ||
| /* numIterations= */ 3, | ||
| "MaxBytes4")); | ||
| } | ||
|
|
||
| // ================================================================================================ | ||
| // Helper methods | ||
| // ================================================================================================ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you add method docs explaining what each method does?