Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion db/compaction/compaction_outputs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -279,9 +279,20 @@ bool CompactionOutputs::ShouldStopBefore(const CompactionIterator& c_iter) {

// reach the max file size
uint64_t estimated_file_size = current_output_file_size_;
if (compaction_->mutable_cf_options().target_file_size_is_upper_bound) {

// When target_file_size_is_upper_bound is enabled, add tail size estimation
// to enforce target file size as a hard upper bound. Only add tail size when
// there is:
// 1. At least one key to prevent creating empty files when tail overhead
// alone would exceed max_output_file_size.
// 2. At least one data block written to prevent premature file cutting when
// tail overhead estimation would trigger cutting before meaningful data
// is written (prevents files with only metadata/tail blocks).
if (compaction_->mutable_cf_options().target_file_size_is_upper_bound &&
builder_->NumEntries() > 0 && builder_->NumDataBlocks() > 0) {
estimated_file_size += builder_->EstimatedTailSize();
}

if (estimated_file_size >= compaction_->max_output_file_size()) {
return true;
}
Expand Down
3 changes: 3 additions & 0 deletions db/db_block_cache_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -853,6 +853,9 @@ TEST_F(DBBlockCacheTest, CacheCompressionDict) {
options.num_levels = 2;
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
options.target_file_size_base = kNumEntriesPerFile * kNumBytesPerEntry;
// Disable target_file_size_is_upper_bound to maintain the expected file
// layout for testing.
options.target_file_size_is_upper_bound = false;
BlockBasedTableOptions table_options;
table_options.cache_index_and_filter_blocks = true;
table_options.block_cache.reset(new MockCache());
Expand Down
6 changes: 6 additions & 0 deletions db/db_iterator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4288,6 +4288,9 @@ TEST_P(DBMultiScanIteratorTest, RangeAcrossFiles) {
options.compaction_style = kCompactionStyleUniversal;
options.num_levels = 50;
options.compression = kNoCompression;
// Disable target_file_size_is_upper_bound to maintain the expected file
// layout for testing.
options.target_file_size_is_upper_bound = false;
DestroyAndReopen(options);

auto rnd = Random::GetTLSInstance();
Expand Down Expand Up @@ -4458,6 +4461,9 @@ TEST_P(DBMultiScanIteratorTest, RangeBetweenFiles) {
options.compaction_style = kCompactionStyleUniversal;
options.num_levels = 50;
options.compression = kNoCompression;
// Disable target_file_size_is_upper_bound to maintain the expected file
// layout for testing.
options.target_file_size_is_upper_bound = false;
DestroyAndReopen(options);

auto rnd = Random::GetTLSInstance();
Expand Down
23 changes: 23 additions & 0 deletions db/db_range_del_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ TEST_F(DBRangeDelTest, CompactionOutputFilesExactlyFilled) {
options.memtable_factory.reset(test::NewSpecialSkipListFactory(kNumPerFile));
options.num_levels = 2;
options.target_file_size_base = kFileBytes;
// Disable target_file_size_is_upper_bound to test the exact edge case without
// interference from tail size estimation. The target_file_size_is_upper_bound
// feature prevents this edge case from occurring by cutting files before they
// reach the target size when accounting for the tail size.
options.target_file_size_is_upper_bound = false;
BlockBasedTableOptions table_options;
table_options.block_size_deviation = 50; // each block holds two keys
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
Expand Down Expand Up @@ -1803,6 +1808,9 @@ TEST_F(DBRangeDelTest, OversizeCompactionGapBetweenPointKeyAndTombstone) {
options.disable_auto_compactions = true;
options.target_file_size_base = 9 * 1024;
options.max_compaction_bytes = 9 * 1024;
// Disable target_file_size_is_upper_bound to maintain the expected file
// layout for testing.
options.target_file_size_is_upper_bound = false;
DestroyAndReopen(options);
Random rnd(301);
for (int i = 0; i < kNumFiles; ++i) {
Expand Down Expand Up @@ -1840,6 +1848,9 @@ TEST_F(DBRangeDelTest, OversizeCompactionGapBetweenTombstone) {
options.disable_auto_compactions = true;
options.target_file_size_base = 9 * 1024;
options.max_compaction_bytes = 9 * 1024;
// Disable target_file_size_is_upper_bound to maintain the expected file
// layout needed for testing
options.target_file_size_is_upper_bound = false;
DestroyAndReopen(options);
Random rnd(301);
for (int i = 0; i < kNumFiles; ++i) {
Expand Down Expand Up @@ -1879,6 +1890,9 @@ TEST_F(DBRangeDelTest, OversizeCompactionPointKeyWithinRangetombstone) {
options.disable_auto_compactions = true;
options.target_file_size_base = 9 * 1024;
options.max_compaction_bytes = 9 * 1024;
// Disable target_file_size_is_upper_bound to maintain the expected file
// layout for testing max_compaction_bytes behavior.
options.target_file_size_is_upper_bound = false;
DestroyAndReopen(options);
Random rnd(301);
for (int i = 0; i < 9; ++i) {
Expand Down Expand Up @@ -1907,6 +1921,9 @@ TEST_F(DBRangeDelTest, OverlappedTombstones) {
options.disable_auto_compactions = true;
options.target_file_size_base = 9 * 1024;
options.max_compaction_bytes = 9 * 1024;
// Disable target_file_size_is_upper_bound to maintain the expected file
// layout for testing.
options.target_file_size_is_upper_bound = false;
DestroyAndReopen(options);
Random rnd(301);
for (int i = 0; i < kNumFiles; ++i) {
Expand Down Expand Up @@ -1947,6 +1964,9 @@ TEST_F(DBRangeDelTest, OverlappedKeys) {
options.disable_auto_compactions = true;
options.target_file_size_base = 9 * 1024;
options.max_compaction_bytes = 9 * 1024;
// Disable target_file_size_is_upper_bound to maintain the expected file
// layout needed for testing.
options.target_file_size_is_upper_bound = false;
DestroyAndReopen(options);
Random rnd(301);
for (int i = 0; i < kNumFiles; ++i) {
Expand Down Expand Up @@ -2804,6 +2824,9 @@ TEST_F(DBRangeDelTest, LeftSentinelKeyTestWithNewerKey) {
options.disable_auto_compactions = true;
options.target_file_size_base = 3 * 1024;
options.max_compaction_bytes = 3 * 1024;
// Disable target_file_size_is_upper_bound to maintain the specific L1 file
// layout required for testing
options.target_file_size_is_upper_bound = false;

DestroyAndReopen(options);
// L2
Expand Down
41 changes: 38 additions & 3 deletions db/db_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5671,6 +5671,7 @@ TEST_F(DBTest, DynamicCompactionOptions) {
options.target_file_size_multiplier = 1;
options.max_bytes_for_level_base = k128KB;
options.max_bytes_for_level_multiplier = 4;
options.target_file_size_is_upper_bound = false;

// Block flush thread and disable compaction thread
env_->SetBackgroundThreads(1, Env::LOW);
Expand All @@ -5687,7 +5688,8 @@ TEST_F(DBTest, DynamicCompactionOptions) {

// Write 3 files that have the same key range.
// Since level0_file_num_compaction_trigger is 3, compaction should be
// triggered. The compaction should result in one L1 file
// triggered. With target_file_size_is_upper_bound disabled, the compaction
// can result in 1 L1 file that exceeds target_file_size_base.
gen_l0_kb(0, 64, 1);
ASSERT_EQ(NumTableFilesAtLevel(0), 1);
gen_l0_kb(0, 64, 1);
Expand All @@ -5701,9 +5703,42 @@ TEST_F(DBTest, DynamicCompactionOptions) {
ASSERT_LE(metadata[0].size, k64KB + k4KB);
ASSERT_GE(metadata[0].size, k64KB - k4KB);

// Test dynamically enabling target_file_size_is_upper_bound
// Write 3 more files with the same key range. With
// target_file_size_is_upper_bound enabled, compaction should result in 2 L1
// files to avoid exceeding target_file_size_base.
ASSERT_OK(
dbfull()->SetOptions({{"target_file_size_is_upper_bound", "true"}}));

gen_l0_kb(0, 64, 1);
gen_l0_kb(0, 64, 1);
gen_l0_kb(0, 64, 1);
ASSERT_OK(dbfull()->TEST_WaitForCompact());
ASSERT_EQ("0,2", FilesPerLevel());
metadata.clear();
db_->GetLiveFilesMetaData(&metadata);
ASSERT_GE(metadata.size(), 2U);
ASSERT_LE(metadata[0].size, k64KB);
ASSERT_LE(metadata[1].size, k32KB);
ASSERT_GE(metadata[1].size, k4KB);

// Test dynamically disabling target_file_size_is_upper_bound,
// compaction merge should result in 1 L1 file that exceeds
// target_file_size_base.
ASSERT_OK(
dbfull()->SetOptions({{"target_file_size_is_upper_bound", "false"}}));

ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
ASSERT_EQ("0,1", FilesPerLevel());
metadata.clear();
db_->GetLiveFilesMetaData(&metadata);
ASSERT_GE(metadata.size(), 1U);
ASSERT_LE(metadata[0].size, k64KB + k4KB);
ASSERT_GE(metadata[0].size, k64KB - k4KB);

// Test compaction trigger and target_file_size_base
// Reduce compaction trigger to 2, and reduce L1 file size to 32KB.
// Writing to 64KB L0 files should trigger a compaction. Since these
// Writing two 64KB L0 files should trigger a compaction. Since these
// 2 L0 files have the same key range, compaction merge them and should
// result in 2 32KB L1 files.
ASSERT_OK(
Expand Down Expand Up @@ -6631,7 +6666,7 @@ TEST_F(DBTest, SuggestCompactRangeUniversal) {
GenerateNewRandomFile(&rnd);
}

ASSERT_EQ("1,2,3,4", FilesPerLevel());
ASSERT_EQ("1,3,4,5", FilesPerLevel());
for (int i = 0; i < 3; i++) {
ASSERT_OK(
db_->SuggestCompactRange(db_->DefaultColumnFamily(), nullptr, nullptr));
Expand Down
3 changes: 3 additions & 0 deletions db/db_test2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1577,6 +1577,9 @@ TEST_F(DBTest2, MaxCompactionBytesTest) {
options.target_file_size_base = 100 << 10;
// Infinite for full compaction.
options.max_compaction_bytes = options.target_file_size_base * 100;
// Disable target_file_size_is_upper_bound maintain predictable file sizes for
// testing.
options.target_file_size_is_upper_bound = false;

Reopen(options);

Expand Down
4 changes: 2 additions & 2 deletions include/rocksdb/advanced_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -534,10 +534,10 @@ struct AdvancedColumnFamilyOptions {
// large tail blocks. When disabled, only the data block size is considered,
// which may result in SST files exceeding the target_file_size_base.
//
// Default: false
// Default: true
//
// Dynamically changeable through SetOptions() API
bool target_file_size_is_upper_bound = false;
bool target_file_size_is_upper_bound = true;

// If true, RocksDB will pick target size of each level dynamically.
// We will pick a base level b >= 1. L0 will be directly merged into level b,
Expand Down
48 changes: 48 additions & 0 deletions java/rocksjni/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2800,6 +2800,30 @@ void Java_org_rocksdb_Options_setTargetFileSizeMultiplier(
static_cast<int>(jtarget_file_size_multiplier);
}

/*
* Class: org_rocksdb_Options
* Method: setTargetFileSizeIsUpperBound
* Signature: (JZ)V
*/
void Java_org_rocksdb_Options_setTargetFileSizeIsUpperBound(
JNIEnv*, jclass, jlong jhandle, jboolean jtarget_file_size_is_upper_bound) {
reinterpret_cast<ROCKSDB_NAMESPACE::Options*>(jhandle)
->target_file_size_is_upper_bound =
static_cast<bool>(jtarget_file_size_is_upper_bound);
}

/*
* Class: org_rocksdb_Options
* Method: targetFileSizeIsUpperBound
* Signature: (J)Z
*/
jboolean Java_org_rocksdb_Options_targetFileSizeIsUpperBound(JNIEnv*, jclass,
jlong jhandle) {
return static_cast<jboolean>(
reinterpret_cast<ROCKSDB_NAMESPACE::Options*>(jhandle)
->target_file_size_is_upper_bound);
}

/*
* Class: org_rocksdb_Options
* Method: maxBytesForLevelBase
Expand Down Expand Up @@ -4753,6 +4777,30 @@ void Java_org_rocksdb_ColumnFamilyOptions_setTargetFileSizeMultiplier(
static_cast<int>(jtarget_file_size_multiplier);
}

/*
* Class: org_rocksdb_ColumnFamilyOptions
* Method: setTargetFileSizeIsUpperBound
* Signature: (JZ)V
*/
void Java_org_rocksdb_ColumnFamilyOptions_setTargetFileSizeIsUpperBound(
JNIEnv*, jclass, jlong jhandle, jboolean jtarget_file_size_is_upper_bound) {
reinterpret_cast<ROCKSDB_NAMESPACE::ColumnFamilyOptions*>(jhandle)
->target_file_size_is_upper_bound =
static_cast<bool>(jtarget_file_size_is_upper_bound);
}

/*
* Class: org_rocksdb_ColumnFamilyOptions
* Method: targetFileSizeIsUpperBound
* Signature: (J)Z
*/
jboolean Java_org_rocksdb_ColumnFamilyOptions_targetFileSizeIsUpperBound(
JNIEnv*, jclass, jlong jhandle) {
return static_cast<jboolean>(
reinterpret_cast<ROCKSDB_NAMESPACE::ColumnFamilyOptions*>(jhandle)
->target_file_size_is_upper_bound);
}

/*
* Class: org_rocksdb_ColumnFamilyOptions
* Method: maxBytesForLevelBase
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,29 @@ T setTargetFileSizeMultiplier(
*/
int targetFileSizeMultiplier();

/**
* If true, RocksDB enforces stricter file size limits when deciding whether to cut a compaction
* output file, which prevents files from exceeding target_file_size_base. When
* false, SST files might exceed the target_file_size_base.
*
* @param targetFileSizeIsUpperBound whether to treat target file size as
* an upper bound
* @return the reference to the current options.
*
* Default: true
*
* Dynamically changeable through SetOptions() API
*/
T setTargetFileSizeIsUpperBound(boolean targetFileSizeIsUpperBound);

/**
* If true, RocksDB enforces stricter file size limits when deciding whether to cut a compaction
* output file. Otherwise, files might exceed the target_file_size_base.
*
* @return whether target file size is treated as an upper bound
*/
boolean targetFileSizeIsUpperBound();

/**
* The ratio between the total size of level-(L+1) files and the total
* size of level-L files for all L.
Expand Down
15 changes: 15 additions & 0 deletions java/src/main/java/org/rocksdb/ColumnFamilyOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,18 @@ public int targetFileSizeMultiplier() {
return targetFileSizeMultiplier(nativeHandle_);
}

@Override
public ColumnFamilyOptions setTargetFileSizeIsUpperBound(
final boolean targetFileSizeIsUpperBound) {
setTargetFileSizeIsUpperBound(nativeHandle_, targetFileSizeIsUpperBound);
return this;
}

@Override
public boolean targetFileSizeIsUpperBound() {
return targetFileSizeIsUpperBound(nativeHandle_);
}

@Override
public ColumnFamilyOptions setMaxBytesForLevelBase(
final long maxBytesForLevelBase) {
Expand Down Expand Up @@ -1382,6 +1394,9 @@ private static native void setBottommostCompressionOptions(
private static native long targetFileSizeBase(long handle);
private static native void setTargetFileSizeMultiplier(long handle, int multiplier);
private static native int targetFileSizeMultiplier(long handle);
private static native void setTargetFileSizeIsUpperBound(
long handle, boolean targetFileSizeIsUpperBound);
private static native boolean targetFileSizeIsUpperBound(long handle);
private static native void setMaxBytesForLevelBase(long handle, long maxBytesForLevelBase);
private static native long maxBytesForLevelBase(long handle);
private static native void setLevelCompactionDynamicLevelBytes(
Expand Down
13 changes: 13 additions & 0 deletions java/src/main/java/org/rocksdb/MutableColumnFamilyOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ public enum CompactionOption implements MutableColumnFamilyOptionKey {
max_compaction_bytes(ValueType.LONG),
target_file_size_base(ValueType.LONG),
target_file_size_multiplier(ValueType.INT),
target_file_size_is_upper_bound(ValueType.BOOLEAN),
max_bytes_for_level_base(ValueType.LONG),
max_bytes_for_level_multiplier(ValueType.INT),
max_bytes_for_level_multiplier_additional(ValueType.INT_ARRAY),
Expand Down Expand Up @@ -406,6 +407,18 @@ public int targetFileSizeMultiplier() {
return getInt(CompactionOption.target_file_size_multiplier);
}

@Override
public MutableColumnFamilyOptionsBuilder setTargetFileSizeIsUpperBound(
final boolean targetFileSizeIsUpperBound) {
return setBoolean(
CompactionOption.target_file_size_is_upper_bound, targetFileSizeIsUpperBound);
}

@Override
public boolean targetFileSizeIsUpperBound() {
return getBoolean(CompactionOption.target_file_size_is_upper_bound);
}

@Override
public MutableColumnFamilyOptionsBuilder setMaxBytesForLevelBase(
final long maxBytesForLevelBase) {
Expand Down
14 changes: 14 additions & 0 deletions java/src/main/java/org/rocksdb/Options.java
Original file line number Diff line number Diff line change
Expand Up @@ -1486,6 +1486,17 @@ public Options setTargetFileSizeMultiplier(final int multiplier) {
return this;
}

@Override
public boolean targetFileSizeIsUpperBound() {
return targetFileSizeIsUpperBound(nativeHandle_);
}

@Override
public Options setTargetFileSizeIsUpperBound(final boolean targetFileSizeIsUpperBound) {
setTargetFileSizeIsUpperBound(nativeHandle_, targetFileSizeIsUpperBound);
return this;
}

@Override
public Options setMaxBytesForLevelBase(final long maxBytesForLevelBase) {
setMaxBytesForLevelBase(nativeHandle_, maxBytesForLevelBase);
Expand Down Expand Up @@ -2360,6 +2371,9 @@ private static native void setBottommostCompressionOptions(
private static native long targetFileSizeBase(long handle);
private static native void setTargetFileSizeMultiplier(long handle, int multiplier);
private static native int targetFileSizeMultiplier(long handle);
private static native void setTargetFileSizeIsUpperBound(
long handle, boolean targetFileSizeIsUpperBound);
private static native boolean targetFileSizeIsUpperBound(long handle);
private static native void setMaxBytesForLevelBase(long handle, long maxBytesForLevelBase);
private static native long maxBytesForLevelBase(long handle);
private static native void setLevelCompactionDynamicLevelBytes(
Expand Down
Loading
Loading