Skip to content

Commit

Permalink
[PLAT-14893] Add a new node action decommission node
Browse files Browse the repository at this point in the history
Summary:
Added a new node action Decommission, which is the equivalent of Remove + Release + Delete node actions.

The restrictions on Decommission are the same as current Remove/Release

1. Either other nodes exist in this AZ or no tablets must be assigned to this node being decommissioned.
2. A final check is done to verify no tablets are assigned to this node before releasing the VM / deleting onprem data.
3. Cannot go below floor(RF/2) masters as a result of this operation.

It is allowed to delete an entire AZ using this operation similar to current remove -> release -> delete. This will be addressed separately based on discussion.

Test Plan:
1. Local provider test for removing 1 node from 3 node, RF1
2. Local provider test for removing 1 node w/ master from (2,1,1) RF3 cluster
3. Local provider test for removing 1 node from RR cluster
4. Tested on 4 / 6 node RF3 AWS/GCP universe on nodes with and without master
5. Verified that the action does not show up in UI and the API call fails for RF3 3 node universe

Reviewers: cwang, nsingh, yshchetinin

Reviewed By: cwang

Subscribers: yugaware

Differential Revision: https://phorge.dev.yugabyte.com/D37885
  • Loading branch information
iSignal committed Dec 11, 2024
1 parent 96c4579 commit 8506386
Show file tree
Hide file tree
Showing 27 changed files with 641 additions and 309 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
/*
* Copyright 2024 YugaByte, Inc. and Contributors
*
* Licensed under the Polyform Free Trial License 1.0.0 (the "License"); you
* may not use this file except in compliance with the License. You
* may obtain a copy of the License at
*
* https://github.com/YugaByte/yugabyte-db/blob/master/licenses/POLYFORM-FREE-TRIAL-LICENSE-1.0.0.txt
*/

package com.yugabyte.yw.commissioner.tasks;

import com.yugabyte.yw.commissioner.BaseTaskDependencies;
import com.yugabyte.yw.commissioner.ITask.Retryable;
import com.yugabyte.yw.commissioner.UserTaskDetails.SubTaskGroupType;
import com.yugabyte.yw.commissioner.tasks.params.NodeTaskParams;
import com.yugabyte.yw.common.NodeActionType;
import com.yugabyte.yw.common.config.UniverseConfKeys;
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams.Cluster;
import com.yugabyte.yw.models.Universe;
import com.yugabyte.yw.models.helpers.NodeDetails;
import java.util.Set;
import javax.inject.Inject;
import lombok.extern.slf4j.Slf4j;

@Slf4j
@Retryable
public class DecommissionNode extends EditUniverseTaskBase {

@Inject
protected DecommissionNode(BaseTaskDependencies baseTaskDependencies) {
super(baseTaskDependencies);
}

@Override
protected NodeTaskParams taskParams() {
return (NodeTaskParams) taskParams;
}

private void runBasicChecks(Universe universe) {
NodeDetails currentNode = universe.getNode(taskParams().nodeName);
if (isFirstTry()) {
currentNode.validateActionOnState(NodeActionType.DECOMMISSION);
}
}

@Override
public void validateParams(boolean isFirstTry) {
super.validateParams(isFirstTry);
runBasicChecks(getUniverse());
}

// Check that there is a place to move the tablets and if not, make sure there are no tablets
// assigned to this tserver. Otherwise, do not allow the remove node task to succeed.
public void performPrecheck() {
Universe universe = getUniverse();
NodeDetails currentNode = universe.getNode(taskParams().nodeName);

if (!isTabletMovementAvailable(taskParams().nodeName)) {
log.debug(
"Tablets have nowhere to move off of tserver on node: {}. Checking if there are still"
+ " tablets assigned to it. A healthy tserver should not be removed.",
currentNode.getNodeName());
// TODO: Move this into a subtask.
checkNoTabletsOnNode(universe, currentNode);
}
log.debug("Pre-check succeeded");
}

@Override
protected void createPrecheckTasks(Universe universe) {

NodeDetails currentNode = universe.getNode(taskParams().nodeName);
if (currentNode == null) {
if (isFirstTry()) {
String msg =
"No node " + taskParams().nodeName + " found in universe " + universe.getName();
log.error(msg);
throw new RuntimeException(msg);
} else {
// We might be here on a retry that actually deleted the node
// don't do anything in this case
return;
}
}

if (isFirstTry()) {
setToBeRemovedState(currentNode);
configureTaskParams(universe);
}

// Check again after locking.
runBasicChecks(getUniverse());
boolean alwaysWaitForDataMove =
confGetter.getConfForScope(getUniverse(), UniverseConfKeys.alwaysWaitForDataMove);
if (alwaysWaitForDataMove) {
performPrecheck();
}
addBasicPrecheckTasks();
}

@Override
public void run() {
log.info(
"Started {} task for node {} in univ uuid={}",
getName(),
taskParams().nodeName,
taskParams().getUniverseUUID());
checkUniverseVersion();

Universe universe = getUniverse();
if (universe.getNode(taskParams().nodeName) == null) {
log.info("No node found with name {}", taskParams().nodeName);
if (isFirstTry()) {
throw new RuntimeException(
String.format("Node %s appears to have already been deleted", taskParams().nodeName));
} else {
log.info("Completing task because no node {} found", taskParams().nodeName);
}
return;
}

universe =
lockAndFreezeUniverseForUpdate(
taskParams().expectedUniverseVersion, this::freezeUniverseInTxn);
try {
preTaskActions();

Cluster taskParamsCluster = taskParams().getClusterByNodeName(taskParams().nodeName);
NodeDetails currentNode = universe.getNode(taskParams().nodeName);
taskParams().azUuid = currentNode.azUuid;
taskParams().placementUuid = currentNode.placementUuid;

Set<NodeDetails> addedMasters = getAddedMasters();
Set<NodeDetails> removedMasters = getRemovedMasters();

// Update the cluster in memory.
universe
.getUniverseDetails()
.upsertCluster(
taskParamsCluster.userIntent,
taskParamsCluster.placementInfo,
taskParamsCluster.uuid);

log.info("Decommission: added masters {}, removed masters {}", addedMasters, removedMasters);

editCluster(
universe,
taskParams().clusters,
taskParamsCluster,
getNodesInCluster(taskParamsCluster.uuid, addedMasters),
getNodesInCluster(taskParamsCluster.uuid, removedMasters),
!addedMasters.isEmpty() || !removedMasters.isEmpty() /*updateMasters*/,
true /* force */);

createUpdateUniverseIntentTask(taskParamsCluster, true /*updatePlacementInfo*/);

// Mark universe task state to success
createMarkUniverseUpdateSuccessTasks().setSubTaskGroupType(SubTaskGroupType.RemovingNode);

// Run all the tasks.
getRunnableTask().runSubTasks();
} catch (Throwable t) {
log.error("Error executing task {} with error='{}'.", getName(), t.getMessage(), t);
throw t;
} finally {
unlockUniverseForUpdate();
}
log.info("Finished {} task.", getName());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,16 @@ protected void editCluster(

removeMasters.addAll(mastersToStop);

log.info(
"editCluster: nodesToBeRemoved {}, removeMasters: {}, tserversToBeRemoved: {}, newMasters:"
+ " {}, existingNodesToStartMasters: {}, mastersToStop: {}",
nodesToBeRemoved,
removeMasters,
tserversToBeRemoved,
newMasters,
existingNodesToStartMaster,
mastersToStop);

boolean isWaitForLeadersOnPreferred =
confGetter.getConfForScope(universe, UniverseConfKeys.ybEditWaitForLeadersOnPreferred);

Expand Down Expand Up @@ -531,4 +541,18 @@ public void createCheckCertificateConfigTask(
createCheckCertificateConfigTask(
clusters, nodes, rootCA, clientRootCA, enableClientToNodeEncrypt, null);
}

protected void setToBeRemovedState(NodeDetails currentNode) {
Set<NodeDetails> nodes = taskParams().nodeDetailsSet;
for (NodeDetails node : nodes) {
if (node.getNodeName() != null && node.getNodeName().equals(currentNode.getNodeName())) {
node.state = NodeState.ToBeRemoved;
return;
}
}
throw new RuntimeException(
String.format(
"Error setting node %s to ToBeRemoved state as node was not found",
currentNode.getNodeName()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,18 @@
import com.yugabyte.yw.commissioner.tasks.params.NodeTaskParams;
import com.yugabyte.yw.common.DnsManager;
import com.yugabyte.yw.common.NodeActionType;
import com.yugabyte.yw.common.PlacementInfoUtil;
import com.yugabyte.yw.common.config.GlobalConfKeys;
import com.yugabyte.yw.common.config.UniverseConfKeys;
import com.yugabyte.yw.forms.NodeActionFormData;
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams.Cluster;
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams.UserIntent;
import com.yugabyte.yw.models.Universe;
import com.yugabyte.yw.models.helpers.CommonUtils;
import com.yugabyte.yw.models.helpers.NodeDetails;
import com.yugabyte.yw.models.helpers.NodeDetails.MasterState;
import com.yugabyte.yw.models.helpers.NodeDetails.NodeState;
import com.yugabyte.yw.models.helpers.PlacementInfo;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;
import javax.inject.Inject;
import lombok.extern.slf4j.Slf4j;
import org.yb.util.TabletServerInfo;

// Allows the removal of a node from a universe. Ensures the task waits for the right set of
// server data move primitives. And stops using the underlying instance, though YW still owns it.
Expand Down Expand Up @@ -87,6 +78,23 @@ public void validateParams(boolean isFirstTry) {
runBasicChecks(getUniverse());
}

// Check that there is a place to move the tablets and if not, make sure there are no tablets
// assigned to this tserver. Otherwise, do not allow the remove node task to succeed.
public void performPrecheck() {
Universe universe = getUniverse();
NodeDetails currentNode = universe.getNode(taskParams().nodeName);

if (!isTabletMovementAvailable(taskParams().nodeName)) {
log.debug(
"Tablets have nowhere to move off of tserver on node: {}. Checking if there are still"
+ " tablets assigned to it. A healthy tserver should not be removed.",
currentNode.getNodeName());
// TODO: Move this into a subtask.
checkNoTabletsOnNode(universe, currentNode);
}
log.debug("Pre-check succeeded");
}

@Override
protected void createPrecheckTasks(Universe universe) {
// Check again after locking.
Expand Down Expand Up @@ -151,7 +159,7 @@ public void run() {
Collections.singleton(currentNode), universe.getUniverseDetails().clusters)
.setSubTaskGroupType(SubTaskGroupType.WaitForDataMigration);

if (alwaysWaitForDataMove || isTabletMovementAvailable()) {
if (alwaysWaitForDataMove || isTabletMovementAvailable(taskParams().nodeName)) {
createWaitForDataMoveTask().setSubTaskGroupType(SubTaskGroupType.WaitForDataMigration);
}

Expand Down Expand Up @@ -214,91 +222,4 @@ public void run() {
}
log.info("Finished {} task.", getName());
}

private boolean isTabletMovementAvailable() {
Universe universe = getUniverse();
NodeDetails currentNode = universe.getNode(taskParams().nodeName);
String softwareVersion =
universe.getUniverseDetails().getPrimaryCluster().userIntent.ybSoftwareVersion;
if (CommonUtils.isReleaseBefore(CommonUtils.MIN_LIVE_TABLET_SERVERS_RELEASE, softwareVersion)) {
log.debug("ListLiveTabletServers is not supported for {} version", softwareVersion);
return true;
}

// taskParams().placementUuid is not used because it will be null for RR.
Cluster currCluster = universe.getUniverseDetails().getClusterByUuid(currentNode.placementUuid);
UserIntent userIntent = currCluster.userIntent;
PlacementInfo pi = currCluster.placementInfo;

Collection<NodeDetails> nodesExcludingCurrentNode =
new HashSet<>(universe.getNodesByCluster(currCluster.uuid));
nodesExcludingCurrentNode.remove(currentNode);
int rfInZone =
PlacementInfoUtil.getZoneRF(
pi,
currentNode.cloudInfo.cloud,
currentNode.cloudInfo.region,
currentNode.cloudInfo.az);

if (rfInZone == -1) {
log.error(
"Unexpected placement info in universe: {} rfInZone: {}", universe.getName(), rfInZone);
throw new RuntimeException(
"Error getting placement info for cluster with node: " + currentNode.nodeName);
}

// We do not get isActive() tservers due to new masters starting up changing
// nodeStates to not-active node states which will cause retry to fail.
// Note: On master leader failover, if a tserver was already down, it will not be reported as a
// "live" tserver even though it has been less than
// "follower_unavailable_considered_failed_sec" secs since the tserver was down. This is
// fine because we do not take into account the current node and if it is not the current
// node that is down we may prematurely fail, which is expected.
List<TabletServerInfo> liveTabletServers = getLiveTabletServers(universe);

List<TabletServerInfo> tserversActiveInAZExcludingCurrentNode =
liveTabletServers.stream()
.filter(
tserverInfo ->
currentNode.cloudInfo.cloud.equals(tserverInfo.getCloudInfo().getCloud())
&& currentNode.cloudInfo.region.equals(
tserverInfo.getCloudInfo().getRegion())
&& currentNode.cloudInfo.az.equals(tserverInfo.getCloudInfo().getZone())
&& currCluster.uuid.equals(tserverInfo.getPlacementUuid())
&& !currentNode.cloudInfo.private_ip.equals(
tserverInfo.getPrivateAddress().getHost()))
.collect(Collectors.toList());

long numActiveTservers = tserversActiveInAZExcludingCurrentNode.size();

// We have replication number of copies a tablet so we need more than the replication
// factor number of nodes for tablets to move off.
// We only want to move data if the number of nodes in the zone are more than or equal
// the RF of the zone.
log.debug(
"Cluster: {}, numNodes in cluster: {}, number of active tservers excluding current node"
+ " removing: {}, RF in az: {}",
currCluster.uuid,
userIntent.numNodes,
numActiveTservers,
rfInZone);
return userIntent.numNodes > userIntent.replicationFactor && numActiveTservers >= rfInZone;
}

// Check that there is a place to move the tablets and if not, make sure there are no tablets
// assigned to this tserver. Otherwise, do not allow the remove node task to succeed.
public void performPrecheck() {
Universe universe = getUniverse();
NodeDetails currentNode = universe.getNode(taskParams().nodeName);

if (!isTabletMovementAvailable()) {
log.debug(
"Tablets have nowhere to move off of tserver on node: {}. Checking if there are still"
+ " tablets assigned to it. A healthy tserver should not be removed.",
currentNode.getNodeName());
// TODO: Move this into a subtask.
checkNoTabletsOnNode(universe, currentNode);
}
log.debug("Pre-check succeeded");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import com.yugabyte.yw.forms.UniverseDefinitionTaskParams.Cluster;
import com.yugabyte.yw.models.Universe;
import com.yugabyte.yw.models.helpers.NodeDetails;
import com.yugabyte.yw.models.helpers.NodeDetails.NodeState;
import java.util.Set;
import javax.inject.Inject;
import lombok.extern.slf4j.Slf4j;
Expand Down Expand Up @@ -131,18 +130,4 @@ public void run() {
log.info("Finished {} task.", getName());
}
}

private void setToBeRemovedState(NodeDetails currentNode) {
Set<NodeDetails> nodes = taskParams().nodeDetailsSet;
for (NodeDetails node : nodes) {
if (node.getNodeName() != null && node.getNodeName().equals(currentNode.getNodeName())) {
node.state = NodeState.ToBeRemoved;
return;
}
}
throw new RuntimeException(
String.format(
"Error setting node %s to ToBeRemoved state as node was not found",
currentNode.getNodeName()));
}
}
Loading

0 comments on commit 8506386

Please sign in to comment.