-
Notifications
You must be signed in to change notification settings - Fork 9
Tracking improvement of estimated space necessary for inflight compactions #648
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: palantir-cassandra-2.2.18
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,6 +50,7 @@ | |
import com.google.common.collect.ImmutableSet; | ||
import com.google.common.collect.ImmutableSet.Builder; | ||
import com.google.common.collect.Iterables; | ||
import org.apache.cassandra.metrics.CompactionMetrics; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
@@ -122,8 +123,8 @@ public class Directories | |
|
||
public static final DataDirectory[] dataDirectories; | ||
|
||
//needed for dealing with race condition when compactions run in parallel, to reflect the actual available space | ||
//see https://github.com/palantir/cassandra/issues/198 | ||
// needed for dealing with race condition when compactions run in parallel, to reflect the actual available space | ||
// see https://github.com/palantir/cassandra/issues/198 | ||
private static final Object COMPACTION_LOCK = new Object(); | ||
private static long expectedSpaceUsedByCompactions = 0; | ||
static | ||
|
@@ -496,37 +497,58 @@ static void sortWriteableCandidates(List<DataDirectoryCandidate> candidates, lon | |
|
||
public Boolean checkAvailableDiskSpaceWithoutConsideringConcurrentCompactions(long estimatedSSTables, long expectedTotalWriteSize) | ||
{ | ||
return checkAvailableDiskSpace(estimatedSSTables, expectedTotalWriteSize, 0); | ||
return checkAvailableDiskSpace(estimatedSSTables, expectedTotalWriteSize, 0, 0); | ||
} | ||
|
||
public Boolean checkAvailableDiskSpaceConsideringConcurrentCompactions(long estimatedSSTables, long expectedTotalWriteSize) | ||
{ | ||
synchronized (COMPACTION_LOCK) | ||
{ | ||
if (!checkAvailableDiskSpace(estimatedSSTables, expectedTotalWriteSize, expectedSpaceUsedByCompactions)) | ||
if (!checkAvailableDiskSpace(estimatedSSTables, | ||
expectedTotalWriteSize, | ||
expectedSpaceUsedByCompactions, | ||
CompactionMetrics.getCompactions() | ||
.stream() | ||
.mapToLong(compactionHolder -> compactionHolder.getCompactionInfo().getCompleted()) | ||
.sum())) | ||
return false; | ||
expectedSpaceUsedByCompactions += expectedTotalWriteSize; | ||
return true; | ||
} | ||
} | ||
|
||
private boolean checkAvailableDiskSpace(long estimatedSSTables, long expectedTotalWriteSize, long expectedSpaceUsedByCompactions) { | ||
/** | ||
* Determines if there is sufficient disk space available to perform a compaction. | ||
* | ||
* @param estimatedSSTables The estimated number of SSTables expected to be generated as a result of the compaction. | ||
* @param expectedTotalWriteSize The total estimated disk space required for all ongoing and pending compactions, in bytes. | ||
* @param expectedSpaceUsedByCompactions The estimated disk space, in bytes, needed specifically for the current compaction. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think you flipped this and the one before it |
||
* @param liveSpaceUsedByInProgressCompactions The disk space, in bytes, currently used by temporary SSTables. | ||
* @return boolean indicating whether there is enough disk space available to proceed with the compaction. | ||
*/ | ||
private boolean checkAvailableDiskSpace(long estimatedSSTables, | ||
long expectedTotalWriteSize, | ||
long expectedSpaceUsedByCompactions, | ||
long liveSpaceUsedByInProgressCompactions) | ||
{ | ||
long writeSize = expectedTotalWriteSize / estimatedSSTables; | ||
long totalAvailable = 0L; | ||
long totalSpace = 0L; | ||
long spaceNeededForInProgressCompactions = Math.max(liveSpaceUsedByInProgressCompactions, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. confused why this is a max and not just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because the estimation for how big the compaction will be is an estimation. In most cases space used by tmp tables takes up more space than the initial estimation, with this setup we will never end up with a negative number meaning we are underestimating the needed space. |
||
expectedSpaceUsedByCompactions - liveSpaceUsedByInProgressCompactions); | ||
|
||
for (DataDirectory dataDir : dataDirectories) | ||
{ | ||
if (DisallowedDirectories.isUnwritable(getLocationForDisk(dataDir))) | ||
continue; | ||
DataDirectoryCandidate candidate = new DataDirectoryCandidate(dataDir); | ||
// exclude directory if its total writeSize does not fit to data directory | ||
if (insufficientDiskSpaceForWriteSize(candidate.availableSpace - expectedSpaceUsedByCompactions, candidate.totalSpace, writeSize)) | ||
if (insufficientDiskSpaceForWriteSize(candidate.availableSpace - spaceNeededForInProgressCompactions, candidate.totalSpace, writeSize)) | ||
continue; | ||
totalAvailable += candidate.availableSpace; | ||
totalSpace += candidate.totalSpace; | ||
} | ||
if (insufficientDiskSpaceForWriteSize(totalAvailable - expectedSpaceUsedByCompactions, totalSpace, expectedTotalWriteSize)) | ||
if (insufficientDiskSpaceForWriteSize(totalAvailable - spaceNeededForInProgressCompactions, totalSpace, expectedTotalWriteSize)) | ||
{ | ||
logger.warn("Insufficient space for compaction - total available space found: {}MB for compaction with" | ||
+ " expected size {}MB, with total disk space {}MB and max disk usage by compaction at {}%", | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agree with the overall idea. Just two concerns from me:
compactionHolder.getCompactionInfo().getCompleted()
? I don't think we emit this as a metric anywhere right now. Can we sanity check either via logging or (preferably) a dashboard and soak for a couple days to verify it looks reasonable?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
1 - Sounds like we want to add an abstraction for the sake of abstracting which is fine. I was going for the most simple solution but if we want to add complexity to ensure isolation that makes sense. Building a singleton to track the compactions and then using it to update metrics should be straight forward.
2 - From testing observations, much more accurate than the estimation used to reserve space. Validating this by observing actual telemetry sounds perfectly valid, I'll set something up.