-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HBASE-29699 Scan#setLimit ignored in MapReduce jobs #7432
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -257,7 +257,10 @@ public ClientSideRegionScanner getScanner() { | |
| public void initialize(InputSplit split, Configuration conf) throws IOException { | ||
| this.scan = TableMapReduceUtil.convertStringToScan(split.getScan()); | ||
| this.split = split; | ||
| this.rowLimitPerSplit = conf.getInt(SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT, 0); | ||
| int confLimit = conf.getInt(SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT, 0); | ||
| int scanLimit = Math.max(scan.getLimit(), 0); | ||
| this.rowLimitPerSplit = | ||
| confLimit == 0 ? scanLimit : scanLimit == 0 ? confLimit : Math.min(confLimit, scanLimit); | ||
|
Comment on lines
+260
to
+263
|
||
| TableDescriptor htd = split.htd; | ||
| RegionInfo hri = this.split.getRegionInfo(); | ||
| FileSystem fs = CommonFSUtils.getCurrentFileSystem(conf); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -304,13 +304,14 @@ public void testWithMockedMapReduceWithNoStartRowStopRow() throws Exception { | |
| } | ||
| } | ||
|
|
||
| @Test | ||
| public void testScanLimit() throws Exception { | ||
| private void testScanLimit(int confLimit, Scan scan, int expectedLimit) throws Exception { | ||
| final TableName tableName = TableName.valueOf(name.getMethodName()); | ||
| final String snapshotName = tableName + "Snapshot"; | ||
| Table table = null; | ||
| try { | ||
| UTIL.getConfiguration().setInt(SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT, 10); | ||
| if (confLimit > 0) { | ||
| UTIL.getConfiguration().setInt(SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT, confLimit); | ||
| } | ||
| if (UTIL.getAdmin().tableExists(tableName)) { | ||
| UTIL.deleteTable(tableName); | ||
| } | ||
|
|
@@ -332,15 +333,14 @@ public void testScanLimit() throws Exception { | |
|
|
||
| Job job = new Job(UTIL.getConfiguration()); | ||
| Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName); | ||
| Scan scan = new Scan(); | ||
| TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), | ||
| TestTableSnapshotInputFormat.class); | ||
|
|
||
| TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan, | ||
| RowCounter.RowCounterMapper.class, NullWritable.class, NullWritable.class, job, true, | ||
| tmpTableDir); | ||
| Assert.assertTrue(job.waitForCompletion(true)); | ||
| Assert.assertEquals(10 * regionNum, | ||
| Assert.assertEquals(expectedLimit * regionNum, | ||
junegunn marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| job.getCounters().findCounter(RowCounter.RowCounterMapper.Counters.ROWS).getValue()); | ||
| } finally { | ||
| if (table != null) { | ||
|
|
@@ -352,6 +352,26 @@ public void testScanLimit() throws Exception { | |
| } | ||
| } | ||
|
|
||
| @Test | ||
| public void testScanLimitOnConfig() throws Exception { | ||
| testScanLimit(10, new Scan(), 10); | ||
| } | ||
|
|
||
| @Test | ||
| public void testScanLimitOnScan() throws Exception { | ||
| testScanLimit(0, new Scan().setLimit(10), 10); | ||
| } | ||
|
|
||
| @Test | ||
| public void testScanLimitOnBothButScanWins() throws Exception { | ||
| testScanLimit(10, new Scan().setLimit(5), 5); | ||
| } | ||
|
|
||
| @Test | ||
| public void testScanLimitOnBothButConfigWins() throws Exception { | ||
| testScanLimit(5, new Scan().setLimit(10), 5); | ||
| } | ||
|
Comment on lines
+355
to
+373
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To verify that the smaller value from the two sources is chosen. Maybe a bit overkill for such a simple logic? Considering each case takes quite a while to run. |
||
|
|
||
| @Test | ||
| public void testNoDuplicateResultsWhenSplitting() throws Exception { | ||
| TableName tableName = TableName.valueOf("testNoDuplicateResultsWhenSplitting"); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We do not set limit here? Strange, if this is the case, lots of our limit related UTs should fail?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The absence of it only matters when Protobuf serialization is involved. For example, in the existing test cases in
TestTableInputFormat, 1. no serialization was performed, and 2. noScan#setLimitcall was made, so the issue never surfaced. So I guess this particular case (Protobuf serialization of a Scan object with a non-default limit) wasn't previously covered by the unit tests.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, limit is in ScanRequest, not in Scan, Scan is just part of a ScanRequest.
So we should not change the code here.