Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion helix-admin-webapp/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@
<dependency>
<groupId>com.thoughtworks.xstream</groupId>
<artifactId>xstream</artifactId>
<version>1.4.21</version>
<version>1.4.19</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
Expand Down
18 changes: 15 additions & 3 deletions helix-core/src/main/java/org/apache/helix/HelixAdmin.java
Original file line number Diff line number Diff line change
Expand Up @@ -800,13 +800,25 @@ Map<String, Boolean> validateInstancesForWagedRebalance(String clusterName,
/**
* Return if instance operation 'Evacuate' is finished.
* @param clusterName
* @param instancesNames
* @return Return true if there is no current state nor pending message on the instance.
* @param instancesName
* @return Return true if there is no FULL_AUTO or CUSTOMIZED resources in the current state nor
* any pending message on the instance.
*/
default boolean isEvacuateFinished(String clusterName, String instancesNames) {
default boolean isEvacuateFinished(String clusterName, String instancesName) {
throw new UnsupportedOperationException("isEvacuateFinished is not implemented.");
}

/**
* Check to see if instance is drained.
* @param clusterName
* @param instanceName
* @return Return true if there is no FULL_AUTO or CUSTOMIZED resources in the current state nor
* any pending message on the instance.
*/
default boolean isInstanceDrained(String clusterName, String instanceName) {
throw new UnsupportedOperationException("isInstanceDrained is not implemented.");
}

/**
* Check to see if swapping between two instances can be completed. Either the swapOut or
* swapIn instance can be passed in.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@
import java.util.Set;

import org.apache.helix.HelixDefinedState;
import org.apache.helix.constants.InstanceConstants;
import org.apache.helix.controller.dataproviders.ResourceControllerDataProvider;
import org.apache.helix.controller.stages.CurrentStateOutput;
import org.apache.helix.model.IdealState;
import org.apache.helix.model.InstanceConfig;
import org.apache.helix.model.LiveInstance;
import org.apache.helix.model.Partition;
import org.apache.helix.model.Resource;
Expand Down Expand Up @@ -132,10 +134,14 @@ private Map<String, String> computeCustomizedBestStateForPartition(
boolean notInErrorState = currentStateMap != null
&& !HelixDefinedState.ERROR.toString().equals(currentStateMap.get(instance));
boolean enabled = !disabledInstancesForPartition.contains(instance) && isResourceEnabled;

InstanceConfig instanceConfig = cache.getInstanceConfigMap().get(instance);
boolean hasEvacuatedOp = instanceConfig != null &&
instanceConfig.getInstanceOperation().getOperation() == InstanceConstants.InstanceOperation.EVACUATE;
boolean isAssignableForCustomizedResource = cache.getLiveInstances().containsKey(instance) && hasEvacuatedOp;
Comment on lines +137 to +140
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we leave custom rebalancer away with this logic currently? I think this requires a comprehensive design with custom rebalance users in open source.

// Note: if instance is not live, the mapping for that instance will not show up in
// BestPossibleMapping (and ExternalView)
if (assignableLiveInstancesMap.containsKey(instance) && notInErrorState) {
// if instance is evacuated keep the instanceStateMap same as idealStateMap
if ((assignableLiveInstancesMap.containsKey(instance) || isAssignableForCustomizedResource) && notInErrorState) {
if (enabled) {
instanceStateMap.put(instance, idealStateMap.get(instance));
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -460,12 +460,17 @@ public void setInstanceOperation(String clusterName, String instanceName,

@Override
public boolean isEvacuateFinished(String clusterName, String instanceName) {
if (!instanceHasFullAutoCurrentStateOrMessage(clusterName, instanceName)) {
InstanceConfig config = getInstanceConfig(clusterName, instanceName);
return config != null && config.getInstanceOperation().getOperation()
.equals(InstanceConstants.InstanceOperation.EVACUATE);
InstanceConfig config = getInstanceConfig(clusterName, instanceName);
if (config == null || config.getInstanceOperation().getOperation() !=
InstanceConstants.InstanceOperation.EVACUATE ) {
return false;
}
return false;
return !instanceHasCurrentStateOrMessage(clusterName, instanceName);
}

@Override
public boolean isInstanceDrained(String clusterName, String instanceName) {
return !instanceHasCurrentStateOrMessage(clusterName, instanceName);
}

/**
Expand Down Expand Up @@ -721,7 +726,7 @@ public boolean completeSwapIfPossible(String clusterName, String instanceName,

@Override
public boolean isReadyForPreparingJoiningCluster(String clusterName, String instanceName) {
if (!instanceHasFullAutoCurrentStateOrMessage(clusterName, instanceName)) {
if (!instanceHasCurrentStateOrMessage(clusterName, instanceName)) {
InstanceConfig config = getInstanceConfig(clusterName, instanceName);
return config != null && INSTANCE_OPERATION_TO_EXCLUDE_FROM_ASSIGNMENT.contains(
config.getInstanceOperation().getOperation());
Expand Down Expand Up @@ -757,13 +762,14 @@ public boolean forceKillInstance(String clusterName, String instanceName, String
}

/**
* Return true if Instance has any current state or pending message. Otherwise, return false if instance is offline,
* Return true if instance has any resource with FULL_AUTO or CUSTOMIZED rebalance mode in current state or
* if instance has any pending message. Otherwise, return false if instance is offline,
* instance has no active session, or if instance is online but has no current state or pending message.
* @param clusterName
* @param instanceName
* @return
*/
private boolean instanceHasFullAutoCurrentStateOrMessage(String clusterName,
private boolean instanceHasCurrentStateOrMessage(String clusterName,
String instanceName) {
HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, _baseDataAccessor);
PropertyKey.Builder keyBuilder = accessor.keyBuilder();
Expand Down Expand Up @@ -803,13 +809,14 @@ private boolean instanceHasFullAutoCurrentStateOrMessage(String clusterName,
return true;
}

// Get set of FULL_AUTO resources
// Get set of FULL_AUTO and CUSTOMIZED resources
List<IdealState> idealStates = accessor.getChildValues(keyBuilder.idealStates(), true);
Set<String> fullAutoResources = idealStates != null ? idealStates.stream()
.filter(idealState -> idealState.getRebalanceMode() == RebalanceMode.FULL_AUTO)
Set<String> resources = idealStates != null ? idealStates.stream()
.filter(idealState -> idealState.getRebalanceMode() == RebalanceMode.FULL_AUTO ||
idealState.getRebalanceMode() == RebalanceMode.CUSTOMIZED)
.map(IdealState::getResourceName).collect(Collectors.toSet()) : Collections.emptySet();

return currentStates.stream().anyMatch(fullAutoResources::contains);
return currentStates.stream().anyMatch(resources::contains);
}

@Override
Expand Down
33 changes: 33 additions & 0 deletions helix-core/src/test/java/org/apache/helix/common/ZkTestBase.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.stream.Collectors;
import javax.management.MBeanServerConnection;
import javax.management.ObjectName;

import org.apache.helix.BaseDataAccessor;
import org.apache.helix.ConfigAccessor;
import org.apache.helix.HelixAdmin;
import org.apache.helix.HelixConstants;
import org.apache.helix.HelixDataAccessor;
import org.apache.helix.HelixManager;
import org.apache.helix.HelixProperty;
Expand Down Expand Up @@ -75,6 +77,7 @@
import org.apache.helix.tools.ClusterStateVerifier;
import org.apache.helix.tools.StateModelConfigGenerator;
import org.apache.helix.zookeeper.api.client.HelixZkClient;
import org.apache.helix.zookeeper.api.client.RealmAwareZkClient;
import org.apache.helix.zookeeper.datamodel.ZNRecord;
import org.apache.helix.zookeeper.impl.client.ZkClient;
import org.apache.helix.zookeeper.impl.factory.DedicatedZkClientFactory;
Expand Down Expand Up @@ -447,6 +450,36 @@ protected void createDBInSemiAuto(ClusterSetup clusterSetup, String clusterName,
clusterSetup.getClusterManagementTool().setResourceIdealState(clusterName, dbName, is);
}

protected void createResourceInCustomizedMode(ClusterSetup clusterSetup, String clusterName, String resourceName,
Map<Integer, String> partitionInstanceMap) {
IdealState idealState = new IdealState(resourceName);
idealState.setNumPartitions(partitionInstanceMap.size());
idealState.setStateModelDefRef(OnlineOfflineSMD.name);
idealState.setRebalanceMode(IdealState.RebalanceMode.CUSTOMIZED);
partitionInstanceMap.forEach((partitionID, instanceName) -> {
idealState.setPartitionState(resourceName + "_" + partitionID,
instanceName, OnlineOfflineSMD.States.ONLINE.toString());
});
clusterSetup.addResourceToCluster(clusterName, resourceName, idealState);
}

protected void removeAllResourcesFromInstance(MockParticipantManager participant, Set<String> excludeResourceNames) {
RealmAwareZkClient zkClient = participant.getZkClient();
String clusterName = participant.getClusterName();
String instanceName = participant.getInstanceName();
String sessionId = zkClient.getChildren(PropertyPathBuilder.instanceCurrentState(clusterName, instanceName)).get(0);
List<String> resourceNames = zkClient.getChildren(
PropertyPathBuilder.instanceCurrentState(clusterName, instanceName, sessionId)
);
for (String resourceName : resourceNames) {
if (!excludeResourceNames.contains(resourceName)) {
String resourcePath = PropertyPathBuilder.instanceCurrentState(clusterName,
instanceName, sessionId, resourceName);
zkClient.delete(resourcePath);
}
}
}

/**
* Validate there should be always minimal active replica and top state replica for each
* partition.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.helix.controller.rebalancer.CustomRebalancer;
import org.apache.helix.controller.stages.CurrentStateOutput;
import org.apache.helix.model.IdealState;
import org.apache.helix.model.InstanceConfig;
import org.apache.helix.model.LiveInstance;
import org.apache.helix.model.OnlineOfflineSMD;
import org.apache.helix.model.Partition;
Expand Down Expand Up @@ -70,7 +71,7 @@ public void testDisabledBootstrappingPartitions() {
.thenReturn(ImmutableSet.of(instanceName));
when(cache.getAssignableLiveInstances())
.thenReturn(ImmutableMap.of(instanceName, new LiveInstance(instanceName)));

when(cache.getInstanceConfigMap()).thenReturn(ImmutableMap.of(instanceName, new InstanceConfig(instanceName)));
CurrentStateOutput currOutput = new CurrentStateOutput();
ResourceAssignment resourceAssignment =
customRebalancer.computeBestPossiblePartitionState(cache, idealState, resource, currOutput);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,33 @@ public void testEvacuate() throws Exception {
Assert.assertEquals(getEVs(), assignment);
}

@Test
public void testEvacuateWithCustomizedResource() throws Exception {
System.out.println("START TestInstanceOperation.testEvacuateWithCustomizedResource() at " + new Date(System.currentTimeMillis()));
for( String resource : _allDBs) {
_gSetupTool.dropResourceFromCluster(CLUSTER_NAME, resource);
}
Assert.assertTrue(_clusterVerifier.verifyByPolling());
String instanceToEvacuate = _participants.get(0).getInstanceName();
String customizedDB = "CustomizedTestDB";
Map<Integer, String> partitionInstanceMap = new HashMap<>();
partitionInstanceMap.put(Integer.valueOf(0), _participants.get(0).getInstanceName());
createResourceInCustomizedMode(_gSetupTool, CLUSTER_NAME, customizedDB, partitionInstanceMap);
Assert.assertTrue(_clusterVerifier.verifyByPolling());
_gSetupTool.getClusterManagementTool()
.manuallyEnableMaintenanceMode(CLUSTER_NAME, true, null, null);
// evacuated instance
_gSetupTool.getClusterManagementTool()
.setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, InstanceConstants.InstanceOperation.EVACUATE);
Assert.assertTrue(_clusterVerifier.verifyByPolling());
Assert.assertFalse(_admin.isEvacuateFinished(CLUSTER_NAME, instanceToEvacuate));
_gSetupTool.getClusterManagementTool()
.manuallyEnableMaintenanceMode(CLUSTER_NAME, false, null, null);
// Drop customized DBs in clusterx
_gSetupTool.dropResourceFromCluster(CLUSTER_NAME, customizedDB);
createTestDBs(DEFAULT_RESOURCE_DELAY_TIME);
}

@Test(dependsOnMethods = "testEvacuate")
public void testRevertEvacuation() throws Exception {
System.out.println("START TestInstanceOperation.testRevertEvacuation() at " + new Date(System.currentTimeMillis()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,11 @@ public boolean isEvacuateFinished(String clusterName, String instancesNames) {
return false;
}

@Override
public boolean isInstanceDrained(String clusterName, String instancesNames) {
return false;
}

@Override
public boolean canCompleteSwap(String clusterName, String instancesNames) {
return false;
Expand Down
2 changes: 1 addition & 1 deletion helix-rest/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@
<dependency>
<groupId>com.thoughtworks.xstream</groupId>
<artifactId>xstream</artifactId>
<version>1.4.21</version>
<version>1.4.19</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ public enum Command {
completeSwapIfPossible,
onDemandRebalance,
isEvacuateFinished,
isInstanceDrained,
setPartitionsToError,
forceKillInstance
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,16 @@ public Response updateInstance(@PathParam("clusterId") String clusterId,
return serverError(e);
}
return OK(OBJECT_MAPPER.writeValueAsString(ImmutableMap.of("successful", evacuateFinished)));
case isInstanceDrained:
boolean instanceDrained;
try {
instanceDrained = admin.isInstanceDrained(clusterId, instanceName);
} catch (HelixException e) {
LOG.error(String.format("Encountered error when checking if instance is drained for cluster: "
+ "{}, instance: {}", clusterId, instanceName), e);
return serverError(e);
}
return OK(OBJECT_MAPPER.writeValueAsString(ImmutableMap.of("successful", instanceDrained)));
case forceKillInstance:
boolean instanceForceKilled = admin.forceKillInstance(clusterId, instanceName, reason, instanceOperationSource);
if (!instanceForceKilled) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,12 @@ public void updateInstance() throws Exception {
Assert.assertEquals(response.getStatus(), Response.Status.OK.getStatusCode());
Assert.assertTrue(evacuateFinishedResult.get("successful"));

response = new JerseyUriRequestBuilder("clusters/{}/instances/{}?command=isInstanceDrained")
.format(CLUSTER_NAME, INSTANCE_NAME).post(this, entity);
Map<String, Boolean> instanceDrainedResult = OBJECT_MAPPER.readValue(response.readEntity(String.class), Map.class);
Assert.assertEquals(response.getStatus(), Response.Status.OK.getStatusCode());
Assert.assertTrue(instanceDrainedResult.get("successful"));

// test isEvacuateFinished on instance with EVACUATE and no currentState
// Create new instance so no currentState or messages assigned to it
String test_instance_name = INSTANCE_NAME + "_foo";
Expand Down