-
Notifications
You must be signed in to change notification settings - Fork 594
HDDS-14108. Provide option in ‘scm safemode status’ to show status of all SCM nodes #9611
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
14173ff
5958634
4b95b95
dc935e6
5fbe051
8dd43f6
836c36c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,10 +18,19 @@ | |
| package org.apache.hadoop.hdds.scm.cli; | ||
|
|
||
| import java.io.IOException; | ||
| import java.net.InetSocketAddress; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.stream.Collectors; | ||
| import org.apache.commons.lang3.StringUtils; | ||
| import org.apache.commons.lang3.tuple.Pair; | ||
| import org.apache.hadoop.hdds.HddsUtils; | ||
| import org.apache.hadoop.hdds.cli.HddsVersionProvider; | ||
| import org.apache.hadoop.hdds.conf.OzoneConfiguration; | ||
| import org.apache.hadoop.hdds.scm.client.ScmClient; | ||
| import org.apache.hadoop.hdds.scm.ha.SCMNodeInfo; | ||
| import org.apache.hadoop.net.NetUtils; | ||
| import picocli.CommandLine; | ||
| import picocli.CommandLine.Command; | ||
|
|
||
| /** | ||
|
|
@@ -33,9 +42,26 @@ | |
| mixinStandardHelpOptions = true, | ||
| versionProvider = HddsVersionProvider.class) | ||
| public class SafeModeCheckSubcommand extends ScmSubcommand { | ||
| @CommandLine.Option(names = {"--all", "-a"}, | ||
| description = "Show safe mode status for all SCM nodes in the service. " + | ||
| "When multiple SCM service IDs are configured, --service-id must be specified.") | ||
| private boolean allNodes; | ||
|
|
||
| @Override | ||
| public void execute(ScmClient scmClient) throws IOException { | ||
| final OzoneConfiguration conf = getOzoneConf(); | ||
| String serviceId = HddsUtils.getScmServiceId(conf); | ||
|
|
||
| if (allNodes) { | ||
| executeForAllNodes(scmClient); | ||
| } else if (StringUtils.isNotEmpty(getScmOption().getScm()) && serviceId != null) { | ||
| executeForSpecificNodeInHA(scmClient, serviceId); | ||
| } else { | ||
| executeForSingleNode(scmClient); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In normal or existing behaviour we need safemode status from leader node most of the time. When no scm address is passed, whether we are getting safe mode status from leader node or not? Because now follower also can accept safemode and can return the status.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @ashishkumar50 for finding this bug, you are right now that we are allowing follower to also accept status command there can be a possibility where when we run safemode status command with no additional option it can return the status of the follower. I have fixed this issue. |
||
| } | ||
| } | ||
|
|
||
| private void executeForSingleNode(ScmClient scmClient) throws IOException { | ||
| boolean execReturn = scmClient.inSafeMode(); | ||
|
|
||
| // Output data list | ||
|
|
@@ -45,12 +71,106 @@ public void execute(ScmClient scmClient) throws IOException { | |
| System.out.println("SCM is out of safe mode."); | ||
| } | ||
| if (isVerbose()) { | ||
| for (Map.Entry<String, Pair<Boolean, String>> entry : | ||
| scmClient.getSafeModeRuleStatuses().entrySet()) { | ||
| Pair<Boolean, String> value = entry.getValue(); | ||
| System.out.printf("validated:%s, %s, %s%n", | ||
| value.getLeft(), entry.getKey(), value.getRight()); | ||
| printSafeModeRules(scmClient.getSafeModeRuleStatuses()); | ||
| } | ||
| } | ||
|
|
||
| private void executeForSpecificNodeInHA(ScmClient scmClient, String serviceId) throws IOException { | ||
| String scmAddress = getScmOption().getScm(); | ||
|
||
|
|
||
| System.out.println("Service ID: " + serviceId); | ||
|
|
||
| final OzoneConfiguration conf = getOzoneConf(); | ||
|
|
||
| List<SCMNodeInfo> nodes = SCMNodeInfo.buildNodeInfo(conf); | ||
|
|
||
| // Find the node matching the --scm address | ||
| List<SCMNodeInfo> matchedNodes = nodes.stream() | ||
| .filter(node -> matchesAddress(node, scmAddress)) | ||
| .collect(Collectors.toList()); | ||
|
|
||
| if (matchedNodes.isEmpty()) { | ||
| throw new IOException("Specified --scm address " + scmAddress + | ||
| " does not match any node in service " + serviceId + | ||
| ". Available nodes: " + nodes.stream() | ||
| .map(n -> n.getScmClientAddress() + " [" + n.getNodeId() + "]") | ||
| .collect(Collectors.joining(", "))); | ||
| } | ||
|
|
||
| queryNode(scmClient, matchedNodes.get(0)); | ||
| } | ||
|
|
||
| private void executeForAllNodes(ScmClient scmClient) throws IOException { | ||
| final OzoneConfiguration conf = getOzoneConf(); | ||
| String serviceId = HddsUtils.getScmServiceId(conf); | ||
|
|
||
| if (serviceId == null) { | ||
| executeForSingleNode(scmClient); | ||
| return; | ||
| } | ||
|
|
||
| System.out.println("Service ID: " + serviceId); | ||
| List<SCMNodeInfo> nodes = SCMNodeInfo.buildNodeInfo(conf); | ||
|
|
||
| for (SCMNodeInfo node : nodes) { | ||
| queryNode(scmClient, node); | ||
| } | ||
| } | ||
|
|
||
| private void queryNode(ScmClient scmClient, SCMNodeInfo node) { | ||
| String nodeId = node.getNodeId(); | ||
|
|
||
| try { | ||
| boolean inSafeMode = scmClient.inSafeModeForNode(nodeId); | ||
|
|
||
| System.out.printf("%s [%s]: %s%n", | ||
| node.getScmClientAddress(), | ||
| nodeId, | ||
| inSafeMode ? "IN SAFE MODE" : "OUT OF SAFE MODE"); | ||
|
|
||
| if (isVerbose()) { | ||
| Map<String, Pair<Boolean, String>> rules = scmClient.getSafeModeRuleStatusesForNode(nodeId); | ||
| if (rules != null && !rules.isEmpty()) { | ||
| printSafeModeRules(rules); | ||
| } | ||
| } | ||
| } catch (Exception e) { | ||
| System.out.printf("%s [%s]: ERROR: %s%n", | ||
| node.getScmClientAddress(), nodeId, e.getMessage()); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Check if the given SCMNodeInfo matches the target address. | ||
| * Tries to match by direct string comparison and by resolved address. | ||
| */ | ||
| private boolean matchesAddress(SCMNodeInfo node, String targetAddress) { | ||
| String nodeAddress = node.getScmClientAddress(); | ||
|
|
||
| // Direct match | ||
| if (nodeAddress.equals(targetAddress)) { | ||
| return true; | ||
| } | ||
|
|
||
| // Try normalizing both addresses and comparing | ||
| try { | ||
| InetSocketAddress target = NetUtils.createSocketAddr(targetAddress); | ||
| InetSocketAddress nodeAddr = NetUtils.createSocketAddr(nodeAddress); | ||
|
|
||
| // Match by resolved IP and port | ||
| return target.getPort() == nodeAddr.getPort() && | ||
| target.getAddress().equals(nodeAddr.getAddress()); | ||
| } catch (Exception e) { | ||
| // If address resolution fails, no match | ||
| return false; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit : Log the exception here before returning false
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have removed the logging here because it creates unwanted noise in the CLI output. Instead, I have ensured that actual errors are properly surfaced when no leader can be determined or when the node specified in --scm option doesn't match, clear error messages are thrown to the user. |
||
| } | ||
| } | ||
|
|
||
| private void printSafeModeRules(Map<String, Pair<Boolean, String>> rules) { | ||
| for (Map.Entry<String, Pair<Boolean, String>> entry : rules.entrySet()) { | ||
| Pair<Boolean, String> value = entry.getValue(); | ||
| System.out.printf("validated:%s, %s, %s%n", | ||
| value.getLeft(), entry.getKey(), value.getRight()); | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: FOLLOWER_READABLE_COMMAND_TYPES