@@ -268,21 +268,23 @@ public void testSelfTuneRowBatchCalculation() throws Exception {
268
268
// Force a larger initial batchSize that can be tuned down
269
269
orcWriter .batchSize = 10 ;
270
270
orcWriter .rowBatch .ensureSize (10 );
271
+ // Given that the available memory is very high, the resulting batchsize should be maxed out
271
272
orcWriter .availableMemory = 100000000 ;
272
- // Given the amount of available memory and a low stripe size, and estimated rowBatchSize, the resulting batchsize should be maxed out
273
+ // Consider that the batch size incrementally increases based on the difference between target and current batchsize (10)
273
274
orcWriter .tuneBatchSize (10 );
274
- System .out .println (orcWriter .batchSize );
275
- // Take into account that increases in batchsize are multiplied by a factor to prevent large jumps in batchsize
276
- Assert .assertTrue (orcWriter .batchSize == (GobblinOrcWriterConfigs .DEFAULT_ORC_WRITER_BATCH_SIZE +10 )/2 );
275
+ Assert .assertEquals (orcWriter .batchSize , 505 );
276
+ orcWriter .tuneBatchSize (10 );
277
+ Assert .assertEquals (orcWriter .batchSize , 752 );
278
+
277
279
orcWriter .availableMemory = 100 ;
278
280
orcWriter .tuneBatchSize (10 );
279
281
// Given that the amount of available memory is low, the resulting batchsize should be 1
280
- Assert .assertTrue (orcWriter .batchSize == 1 );
282
+ Assert .assertEquals (orcWriter .batchSize , 1 );
281
283
orcWriter .availableMemory = 10000 ;
282
284
orcWriter .rowBatch .ensureSize (10000 );
283
285
// Since the rowBatch is large, the resulting batchsize should still be 1 even with more memory
284
286
orcWriter .tuneBatchSize (10 );
285
- Assert .assertTrue (orcWriter .batchSize == 1 );
287
+ Assert .assertEquals (orcWriter .batchSize , 1 );
286
288
}
287
289
288
290
@ Test
@@ -322,4 +324,42 @@ public void testStatePersistenceWhenClosingWriter() throws IOException {
322
324
Assert .assertNotNull (dummyState .getProp (GobblinOrcWriterConfigs .RuntimeStateConfigs .ORC_WRITER_NATIVE_WRITER_MEMORY ));
323
325
Assert .assertNotNull (OrcConf .ROWS_BETWEEN_CHECKS .getAttribute ());
324
326
}
325
- }
327
+
328
+ @ Test
329
+ public void testSelfTuneRowBatchCalculationWithStripeMax () throws Exception {
330
+ Schema schema =
331
+ new Schema .Parser ().parse (this .getClass ().getClassLoader ().getResourceAsStream ("orc_writer_test/schema.avsc" ));
332
+ List <GenericRecord > recordList = deserializeAvroRecords (this .getClass (), schema , "orc_writer_test/data_multi.json" );
333
+
334
+ // Mock WriterBuilder, bunch of mocking behaviors to work-around precondition checks in writer builder
335
+ FsDataWriterBuilder <Schema , GenericRecord > mockBuilder =
336
+ (FsDataWriterBuilder <Schema , GenericRecord >) Mockito .mock (FsDataWriterBuilder .class );
337
+ when (mockBuilder .getSchema ()).thenReturn (schema );
338
+
339
+ State dummyState = new WorkUnit ();
340
+ String stagingDir = Files .createTempDir ().getAbsolutePath ();
341
+ String outputDir = Files .createTempDir ().getAbsolutePath ();
342
+ dummyState .setProp (ConfigurationKeys .WRITER_STAGING_DIR , stagingDir );
343
+ dummyState .setProp (ConfigurationKeys .WRITER_FILE_PATH , "selfTune" );
344
+ dummyState .setProp (ConfigurationKeys .WRITER_OUTPUT_DIR , outputDir );
345
+ dummyState .setProp (GobblinOrcWriterConfigs .ORC_WRITER_AUTO_SELFTUNE_ENABLED , "true" );
346
+ dummyState .setProp (OrcConf .STRIPE_SIZE .getAttribute (), "100" );
347
+ dummyState .setProp (GobblinOrcWriterConfigs .ORC_WRITER_ENABLE_BUFFER_LIMIT_ORC_STRIPE , "true" );
348
+ when (mockBuilder .getFileName (dummyState )).thenReturn ("file" );
349
+
350
+ // Having a closer to manage the life-cycle of the writer object.
351
+ Closer closer = Closer .create ();
352
+ GobblinOrcWriter orcWriter = closer .register (new GobblinOrcWriter (mockBuilder , dummyState ));
353
+ // Force a larger initial batchSize that can be tuned down
354
+ orcWriter .batchSize = 10 ;
355
+ orcWriter .rowBatch .ensureSize (10 );
356
+ orcWriter .availableMemory = 100000000 ;
357
+ // Since the stripe size is 100, the resulting batchsize should be 10 (100/10)
358
+ orcWriter .tuneBatchSize (10 );
359
+ Assert .assertEquals (orcWriter .batchSize ,10 );
360
+
361
+ // Increasing the estimated record size should decrease the max batch size
362
+ orcWriter .tuneBatchSize (100 );
363
+ Assert .assertEquals (orcWriter .batchSize ,1 );
364
+ }
365
+ }
0 commit comments