Skip to content

Commit

Permalink
[PLAT-11162] Validate disk space for target number of nodes on scale …
Browse files Browse the repository at this point in the history
…down

Summary:
Disk free size check when any node is removed. A node can be in
1. ToBeAdded (not yet added) - metric is not available. For such nodes, remote command is run to get the real value for onprem nodes. For cloud, it comes from user intent.
2. ToBeRemoved - nodes to be removed. Metric is available. For down nodes, metrics may not be present. Runtime config can be used to disable the check.
3. Live - metric is available.

Subtasks in a group first collect the info for ToBeAdded nodes.
For ToBeRemoved and Live nodes, a single subtask combines the already collected info with the data from metrics and runs the precheck.

Test Plan:
Tested with both on-prem (2 Azs to 1 az to include Add and Remove) and cloud universes.
Unit tests added.

Screenshot shows the error message on the UI.

{F151878}

On-prem df command
----------------------

```
2024-02-21T21:58:12.240Z  [debug] 2921d2cc-65b8-41a8-9708-2bf2ed30d55c ShellProcessHandler.java:203 [TaskPool-EditUniverse(bcabb364-4101-462a-94aa-b144f51b35ee)-0] com.yugabyte.yw.common.ShellProcessHandler Proc stdout for 'bin/py_wrapper bin/run_node_action.py --is_master --node_name yb-adm ...  df -Pm "/mnt/d0" 2>/dev/null | awk "FNR > 1 {print \$3, \$4, \$6}"' :
2024-02-21T21:58:12.241Z  [debug] 2921d2cc-65b8-41a8-9708-2bf2ed30d55c ShellProcessHandler.java:278 [TaskPool-EditUniverse(bcabb364-4101-462a-94aa-b144f51b35ee)-0] com.yugabyte.yw.common.ShellProcessHandler Command output:
2024-02-21T21:58:12.241Z  [debug] 2921d2cc-65b8-41a8-9708-2bf2ed30d55c ShellProcessHandler.java:278 [TaskPool-EditUniverse(bcabb364-4101-462a-94aa-b144f51b35ee)-0] com.yugabyte.yw.common.ShellProcessHandler 746 101605 /mnt/d0

```

Reviewers: cwang, yshchetinin, amalyshev, sanketh, anijhawan, #yba-api-review

Reviewed By: amalyshev, sanketh, #yba-api-review

Subscribers: yugaware

Differential Revision: https://phorge.dev.yugabyte.com/D32277
  • Loading branch information
nkhogen committed Mar 7, 2024
1 parent e1c3652 commit 66d2754
Show file tree
Hide file tree
Showing 13 changed files with 490 additions and 47 deletions.
1 change: 1 addition & 0 deletions managed/RUNTIME-FLAGS.md
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,4 @@
| "Leaderless tablets check enabled" | "yb.checks.leaderless_tablets.enabled" | "UNIVERSE" | " Whether to run CheckLeaderlessTablets subtask before running universe tasks" | "Boolean" |
| "Leaderless tablets check timeout" | "yb.checks.leaderless_tablets.timeout" | "UNIVERSE" | "Controls the max time out when performing the CheckLeaderlessTablets subtask" | "Duration" |
| "Enable YBC" | "ybc.universe.enabled" | "UNIVERSE" | "Enable YBC for universes during software upgrade" | "Boolean" |
| "Target Node Disk Usage Percentage" | "yb.checks.node_disk_size.target_usage_percentage" | "UNIVERSE" | "Percentage of current disk usage that may consume on the target nodes" | "Integer" |
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import com.yugabyte.yw.commissioner.TaskExecutor;
import com.yugabyte.yw.commissioner.UpgradeTaskBase;
import com.yugabyte.yw.commissioner.UserTaskDetails.SubTaskGroupType;
import com.yugabyte.yw.commissioner.tasks.UniverseTaskBase.ServerType;
import com.yugabyte.yw.commissioner.tasks.subtasks.AnsibleConfigureServers;
import com.yugabyte.yw.commissioner.tasks.subtasks.ChangeMasterConfig;
import com.yugabyte.yw.common.PlacementInfoUtil;
Expand Down Expand Up @@ -70,6 +69,10 @@ protected void configureTaskParams(Universe universe) {
n -> {
n.masterState = MasterState.ToStop;
});
for (Cluster cluster : taskParams().clusters) {
createValidateDiskSizeOnNodeRemovalTasks(
universe, cluster, taskParams().getNodesInCluster(cluster.uuid));
}
createPreflightNodeCheckTasks(
taskParams().clusters,
PlacementInfoUtil.getNodesToProvision(taskParams().nodeDetailsSet),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import com.yugabyte.yw.commissioner.tasks.subtasks.UpdateNodeDetails;
import com.yugabyte.yw.commissioner.tasks.subtasks.UpdateUniverseCommunicationPorts;
import com.yugabyte.yw.commissioner.tasks.subtasks.UpdateUniverseIntent;
import com.yugabyte.yw.commissioner.tasks.subtasks.ValidateNodeDiskSize;
import com.yugabyte.yw.commissioner.tasks.subtasks.WaitForMasterLeader;
import com.yugabyte.yw.common.DnsManager;
import com.yugabyte.yw.common.NodeManager;
Expand Down Expand Up @@ -65,6 +66,7 @@
import com.yugabyte.yw.models.Universe.UniverseUpdater;
import com.yugabyte.yw.models.configs.CustomerConfig;
import com.yugabyte.yw.models.helpers.CloudSpecificInfo;
import com.yugabyte.yw.models.helpers.DeviceInfo;
import com.yugabyte.yw.models.helpers.NodeDetails;
import com.yugabyte.yw.models.helpers.NodeDetails.MasterState;
import com.yugabyte.yw.models.helpers.NodeDetails.NodeState;
Expand Down Expand Up @@ -2420,6 +2422,49 @@ public SubTaskGroup createInstanceExistsCheckTasks(
return subTaskGroup;
}

public void createValidateDiskSizeOnNodeRemovalTasks(
Universe universe, Cluster cluster, Set<NodeDetails> clusterNodes) {
if (config.getBoolean("yb.cloud.enabled")) {
// This is not enabled for cloud.
return;
}
int targetDiskUsagePercentage =
confGetter.getConfForScope(universe, UniverseConfKeys.targetNodeDiskUsagePercentage);
if (targetDiskUsagePercentage <= 0) {
log.info(
"Downsize disk size validation is disabled (usageMultiplierPercentage = {})",
targetDiskUsagePercentage);
return;
}
Set<NodeDetails> nodesToBeRemoved = PlacementInfoUtil.getNodesToBeRemoved(clusterNodes);
if (nodesToBeRemoved.isEmpty()) {
log.debug("No nodes are getting removed");
if (cluster.userIntent.providerType != CloudType.onprem) {
DeviceInfo taskDeviceInfo = cluster.userIntent.deviceInfo;
DeviceInfo existingDeviceInfo = universe.getCluster(cluster.uuid).userIntent.deviceInfo;
if (taskDeviceInfo == null
|| existingDeviceInfo == null
|| (Objects.equals(taskDeviceInfo.numVolumes, existingDeviceInfo.numVolumes)
&& Objects.equals(taskDeviceInfo.volumeSize, existingDeviceInfo.volumeSize))) {
log.debug("No change in the volume configuration");
return;
}
}
}
SubTaskGroup validateSubTaskGroup =
createSubTaskGroup(
ValidateNodeDiskSize.class.getSimpleName(), SubTaskGroupType.ValidateConfigurations);
ValidateNodeDiskSize.Params params =
Json.fromJson(Json.toJson(taskParams()), ValidateNodeDiskSize.Params.class);
params.clusterUuid = cluster.uuid;
params.nodePrefix = universe.getUniverseDetails().nodePrefix;
params.targetDiskUsagePercentage = targetDiskUsagePercentage;
ValidateNodeDiskSize task = createTask(ValidateNodeDiskSize.class);
task.initialize(params);
validateSubTaskGroup.addSubTask(task);
getRunnableTask().addSubTaskGroup(validateSubTaskGroup);
}

protected AnsibleConfigureServers getAnsibleConfigureServerTask(
NodeDetails node,
ServerType processType,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
// Copyright (c) Yugabyte, Inc.

package com.yugabyte.yw.commissioner.tasks.subtasks;

import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import com.google.common.collect.Iterables;
import com.yugabyte.yw.commissioner.BaseTaskDependencies;
import com.yugabyte.yw.commissioner.Common.CloudType;
import com.yugabyte.yw.commissioner.tasks.UniverseDefinitionTaskBase;
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams;
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams.Cluster;
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams.UserIntent;
import com.yugabyte.yw.metrics.MetricQueryHelper;
import com.yugabyte.yw.metrics.MetricQueryResponse;
import com.yugabyte.yw.models.helpers.DeviceInfo;
import com.yugabyte.yw.models.helpers.NodeDetails;
import com.yugabyte.yw.models.helpers.NodeDetails.NodeState;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import javax.inject.Inject;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;

/** This performs best-effort disk space validation based on average disk sizes. */
@Slf4j
public class ValidateNodeDiskSize extends UniverseDefinitionTaskBase {
private static final String DB_DISK_USAGE_QUERY_FORMAT =
"avg(sum by (exported_instance)(log_wal_size{node_prefix=\"%s\"}) + sum by"
+ " (exported_instance)(rocksdb_current_version_sst_files_size{node_prefix=\"%s\"}))/"
+ " 1073741824";

// When this check is run, the ToBeAdded nodes are not yet added to the universe and the root
// mount points are unknown for on-prem nodes.
private static final String DISK_FREE_QUERY_FORMAT =
"sum by (exported_instance,"
+ " mountpoint)(node_filesystem_free_bytes{node_prefix=\"%s\"})/1073741824";

private final MetricQueryHelper metricQueryHelper;

@Inject
protected ValidateNodeDiskSize(
BaseTaskDependencies baseTaskDependencies, MetricQueryHelper metricQueryHelper) {
super(baseTaskDependencies);
this.metricQueryHelper = metricQueryHelper;
}

@JsonDeserialize(converter = Params.Converter.class)
public static class Params extends UniverseDefinitionTaskParams {
public UUID clusterUuid;
// Percentage of current disk usage that may consume on the target nodes.
public int targetDiskUsagePercentage;

public static class Converter extends BaseConverter<Params> {}
}

@Override
protected Params taskParams() {
return (Params) taskParams;
}

private double fetchAvgDiskUsedSize() {
String query =
String.format(DB_DISK_USAGE_QUERY_FORMAT, taskParams().nodePrefix, taskParams().nodePrefix);
log.info("Running query: {}", query);
List<MetricQueryResponse.Entry> responseList = null;
try {
responseList = metricQueryHelper.queryDirect(query);
} catch (RuntimeException e) {
log.error("Failed to run metrics query {} - {}", query, e.getMessage());
}
if (CollectionUtils.isEmpty(responseList)) {
log.info("No metrics fetched for query: {}", query);
return 0.0;
}
ImmutablePair<Double, Double> pair = Iterables.getFirst(responseList.get(0).values, null);
if (pair == null) {
String errMsg = String.format("No response for query %s", query);
log.error(errMsg);
throw new RuntimeException(errMsg);
}
return pair.getRight();
}

private double fetchAvgDiskFreeSize(Map<String, Set<String>> nodeMountPoints) {
String query = String.format(DISK_FREE_QUERY_FORMAT, taskParams().nodePrefix);
log.info("Running query: {}", query);
List<MetricQueryResponse.Entry> responseList = null;
try {
responseList = metricQueryHelper.queryDirect(query);
} catch (RuntimeException e) {
log.error("Failed to run metrics query {} - {}", query, e.getMessage());
}
if (CollectionUtils.isEmpty(responseList)) {
log.info("No metrics fetched for query: {}", query);
return 0.0;
}
double total = 0.0;
int count = 0;
for (MetricQueryResponse.Entry entry : responseList) {
String nodeName = entry.labels.get("exported_instance");
String mountPoint = entry.labels.get("mountpoint");
Set<String> mountPoints = nodeMountPoints.get(nodeName);
if (CollectionUtils.isEmpty(mountPoints)
|| !mountPoints.stream()
.map(m -> Paths.get(m))
.anyMatch(p -> p.startsWith(Paths.get(mountPoint)))) {
log.info("Unmatched mount points {} for node {}", mountPoints, nodeName);
continue;
}
ImmutablePair<Double, Double> pair = Iterables.getFirst(entry.values, null);
if (pair == null) {
String errMsg = String.format("No response for query %s", query);
log.error(errMsg);
throw new RuntimeException(errMsg);
}
total += pair.getRight();
count++;
}
return count == 0 ? total : total / count;
}

private void validateNodeDiskSize(Cluster cluster) {
// Fetch the average disk usage per node.
double avgCurrentDiskUsage = fetchAvgDiskUsedSize();
if (avgCurrentDiskUsage == 0.0) {
log.info("Average disk usage is 0.00 GB. Skipping disk validation");
return;
}
Set<NodeDetails> clusterNodes = taskParams().getNodesInCluster(cluster.uuid);
int totalCurrentNodes =
(int) clusterNodes.stream().filter(n -> n.state != NodeState.ToBeAdded).count();
int totalTargetNodes =
(int) clusterNodes.stream().filter(n -> n.state != NodeState.ToBeRemoved).count();

double avgDiskFreeSize = 0.0;
double totalCurrentDiskUsage = avgCurrentDiskUsage * totalCurrentNodes;
double totalTargetDiskUsage = avgCurrentDiskUsage * totalTargetNodes;
double totalTargetDiskSizeNeeded =
(totalCurrentDiskUsage * taskParams().targetDiskUsagePercentage) / 100;
// Additional disk size needed to distribute the surplus.
double additionalDiskSizeNeeded = totalTargetDiskSizeNeeded - totalTargetDiskUsage;
if (cluster.userIntent.providerType == CloudType.onprem) {
// Fetch the average free disk size per node. ToBeAdded nodes are automatically excluded as
// they do not belong to the universe as this is run before freezing.
Map<String, Set<String>> rootMounts =
getOnpremNodeMountPoints(cluster, n -> n.state != NodeState.ToBeAdded);
log.debug("Root mount points are {}", rootMounts);
avgDiskFreeSize = fetchAvgDiskFreeSize(rootMounts);
}
// If the volumes already have some non-db data, total disk size cannot be used to compare.
// It is better to compare the additional required disk size against the total free size.
double totalTargetDiskFreeSize = 0.0;
for (NodeDetails node : clusterNodes) {
if (node.state == NodeState.ToBeRemoved) {
continue;
}
if (node.state == NodeState.ToBeAdded) {
// For cloud, get the size from the config as this can change.
// For on-prem, average usage is added to the average free to arrive at the total estimate.
totalTargetDiskFreeSize +=
(cluster.userIntent.providerType == CloudType.onprem)
? (avgDiskFreeSize + avgCurrentDiskUsage)
: fetchDiskSizeLocally(cluster, node);
} else {
// Free size can become -ve if it is a downsize.
totalTargetDiskFreeSize +=
(cluster.userIntent.providerType == CloudType.onprem)
? avgDiskFreeSize
: (fetchDiskSizeLocally(cluster, node) - avgCurrentDiskUsage);
}
}
String msg =
String.format(
"Total additional disk size: %,.2f GB, total available size: %,.2f GB",
additionalDiskSizeNeeded, totalTargetDiskFreeSize);
log.info(msg);
if (additionalDiskSizeNeeded > totalTargetDiskFreeSize) {
String errMsg =
String.format(
"Additional disk size of %,.2f GB is needed, but only %,.2f GB is available",
additionalDiskSizeNeeded, Math.max(0.0, totalTargetDiskFreeSize));
throw new RuntimeException(errMsg);
}
}

private double fetchDiskSizeLocally(Cluster cluster, NodeDetails node) {
DeviceInfo deviceInfo = cluster.userIntent.getDeviceInfoForNode(node);
return deviceInfo.volumeSize == null ? -1.0 : deviceInfo.volumeSize;
}

private Map<String, Set<String>> getOnpremNodeMountPoints(
Cluster cluster, Predicate<NodeDetails> filter) {
final Map<String, Set<String>> nodeMountPoints = new HashMap<>();
taskParams().getNodesInCluster(cluster.uuid).stream()
.filter(n -> filter.test(n))
.forEach(
n -> {
UserIntent userIntent = taskParams().getClusterByUuid(n.placementUuid).userIntent;
DeviceInfo deviceInfo = userIntent.getDeviceInfoForNode(n);
if (deviceInfo != null && StringUtils.isNotEmpty(deviceInfo.mountPoints)) {
nodeMountPoints.put(
n.getNodeName(),
Arrays.stream(deviceInfo.mountPoints.split(",")).collect(Collectors.toSet()));
} else {
log.warn("Device info is missing for node {}", n.getNodeName());
}
});
return nodeMountPoints;
}

@Override
public void run() {
validateNodeDiskSize(taskParams().getClusterByUuid(taskParams().clusterUuid));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1046,4 +1046,12 @@ public class UniverseConfKeys extends RuntimeConfigKeysModule {
"Enable YBC for universes during software upgrade",
ConfDataType.BooleanType,
ImmutableList.of(ConfKeyTags.PUBLIC));
public static final ConfKeyInfo<Integer> targetNodeDiskUsagePercentage =
new ConfKeyInfo<>(
"yb.checks.node_disk_size.target_usage_percentage",
ScopeType.UNIVERSE,
"Target Node Disk Usage Percentage",
"Percentage of current disk usage that may consume on the target nodes",
ConfDataType.IntegerType,
ImmutableList.of(ConfKeyTags.PUBLIC));
}
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,9 @@ public enum TaskType {

CheckForClusterServers(CheckClusterConsistency.class),

CheckLeaderlessTablets(CheckLeaderlessTablets.class);
CheckLeaderlessTablets(CheckLeaderlessTablets.class),

ValidateNodeDiskSize(com.yugabyte.yw.commissioner.tasks.subtasks.ValidateNodeDiskSize.class);

private final Class<? extends ITask> taskClass;

Expand Down
3 changes: 3 additions & 0 deletions managed/src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,9 @@ yb {
timeout = 5m
enabled = true
}
node_disk_size {
target_usage_percentage = 100
}
}

health {
Expand Down
Loading

0 comments on commit 66d2754

Please sign in to comment.