From 01f057e4b0696c576d400c0a7378d2bdabfe8692 Mon Sep 17 00:00:00 2001 From: Zhijun Date: Sun, 26 Oct 2025 20:38:58 +0800 Subject: [PATCH 1/6] Cluters: Assign default human-readable nodename when starting server Signed-off-by: Zhijun --- src/cluster_legacy.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 22e2436d21..31d6fe453d 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -1215,7 +1215,11 @@ static void updateAnnouncedHostname(clusterNode *node, char *value) { } static void updateAnnouncedHumanNodename(clusterNode *node, char *value) { - updateSdsExtensionField(&node->human_nodename, value); + /* We should only update the human codename when the provided new + * value isn't NULL, otherwise the following function will clear + * the human codename field. */ + if (value != NULL) + updateSdsExtensionField(&node->human_nodename, value); } static void updateAnnouncedClientIpV4(clusterNode *node, char *value) { @@ -1284,6 +1288,13 @@ static void updateShardId(clusterNode *node, const char *shard_id) { } } +static void updateHumanNodenameToAddress(clusterNode *node) { + const int port = server.tls_cluster? node->tls_port : node->tcp_port; + char buf[64]; + snprintf(buf, sizeof(buf), "%s:%d", node->ip, port); + updateAnnouncedHumanNodename(node, buf); +} + static inline int areInSameShard(clusterNode *node1, clusterNode *node2) { return memcmp(node1->shard_id, node2->shard_id, CLUSTER_NAMELEN) == 0; } @@ -1408,7 +1419,17 @@ void clusterInit(void) { clusterUpdateMyselfClientIpV4(); clusterUpdateMyselfClientIpV6(); clusterUpdateMyselfHostname(); - clusterUpdateMyselfHumanNodename(); + /* Assigning human-readable nodename at start-up makes it much + * easier to read the server logs. By default, a server doesn't + * have a human-readable nodename unless explicting assigned by + * CONFIG SET command or config file edit, so we simply use the + * the server's IP and port as the nodename. This name wil be + * carried in the PING extension so that all nodes in the cluster + * will know this name eventually. */ + if (server.cluster_announce_human_nodename != NULL && server.cluster_announce_human_nodename[0] != '\0') + clusterUpdateMyselfHumanNodename(); + else + updateHumanNodenameToAddress(myself); resetClusterStats(); } @@ -5823,7 +5844,7 @@ void clusterCron(void) { long long cluster_node_conn_attempts = maxConnectionAttemptsPerCron(); while ((de = dictNext(di)) != NULL) { clusterNode *node = dictGetVal(de); - /* We free the inbound or outboud link to the node if the link has an + /* We free the inbound or outbound link to the node if the link has an * oversized message send queue and immediately try reconnecting. */ clusterNodeCronFreeLinkOnBufferLimitReached(node); /* The protocol is that function(s) below return non-zero if the node was From 7b36df885472716b8b279a507c392c90f797e7b5 Mon Sep 17 00:00:00 2001 From: Zhijun Date: Sun, 26 Oct 2025 20:45:32 +0800 Subject: [PATCH 2/6] Formatting Signed-off-by: Zhijun --- src/cluster_legacy.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 31d6fe453d..5abeb418d0 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -1289,7 +1289,7 @@ static void updateShardId(clusterNode *node, const char *shard_id) { } static void updateHumanNodenameToAddress(clusterNode *node) { - const int port = server.tls_cluster? node->tls_port : node->tcp_port; + const int port = server.tls_cluster ? node->tls_port : node->tcp_port; char buf[64]; snprintf(buf, sizeof(buf), "%s:%d", node->ip, port); updateAnnouncedHumanNodename(node, buf); @@ -1426,7 +1426,8 @@ void clusterInit(void) { * the server's IP and port as the nodename. This name wil be * carried in the PING extension so that all nodes in the cluster * will know this name eventually. */ - if (server.cluster_announce_human_nodename != NULL && server.cluster_announce_human_nodename[0] != '\0') + if (server.cluster_announce_human_nodename != NULL && + server.cluster_announce_human_nodename[0] != '\0') clusterUpdateMyselfHumanNodename(); else updateHumanNodenameToAddress(myself); From e6b481345139baaa5061a427f664d64774e8b2bb Mon Sep 17 00:00:00 2001 From: Zhijun Date: Sun, 26 Oct 2025 20:54:43 +0800 Subject: [PATCH 3/6] Typos Signed-off-by: Zhijun --- src/cluster_legacy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 5abeb418d0..03ba2d6e07 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -1215,9 +1215,9 @@ static void updateAnnouncedHostname(clusterNode *node, char *value) { } static void updateAnnouncedHumanNodename(clusterNode *node, char *value) { - /* We should only update the human codename when the provided new + /* We should only update the human nodename when the provided new * value isn't NULL, otherwise the following function will clear - * the human codename field. */ + * the human nodename field. */ if (value != NULL) updateSdsExtensionField(&node->human_nodename, value); } @@ -1423,7 +1423,7 @@ void clusterInit(void) { * easier to read the server logs. By default, a server doesn't * have a human-readable nodename unless explicting assigned by * CONFIG SET command or config file edit, so we simply use the - * the server's IP and port as the nodename. This name wil be + * the server's IP and port as the nodename. This name will be * carried in the PING extension so that all nodes in the cluster * will know this name eventually. */ if (server.cluster_announce_human_nodename != NULL && From 987ebcf37b476ee94eb7ef82d24379863a36bd7c Mon Sep 17 00:00:00 2001 From: Zhijun Date: Mon, 27 Oct 2025 12:39:16 +0800 Subject: [PATCH 4/6] Add ports to tests; Update nodename when IP changes Signed-off-by: Zhijun --- src/cluster_legacy.c | 5 +++-- tests/support/util.tcl | 2 +- tests/unit/cluster/manual-failover.tcl | 13 ++++++++----- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 03ba2d6e07..e8612ebb57 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -1421,8 +1421,8 @@ void clusterInit(void) { clusterUpdateMyselfHostname(); /* Assigning human-readable nodename at start-up makes it much * easier to read the server logs. By default, a server doesn't - * have a human-readable nodename unless explicting assigned by - * CONFIG SET command or config file edit, so we simply use the + * have a human-readable nodename unless explicitly assigned by + * CONFIG SET command or config file edit, so we simply use * the server's IP and port as the nodename. This name will be * carried in the PING extension so that all nodes in the cluster * will know this name eventually. */ @@ -3768,6 +3768,7 @@ int clusterProcessPacket(clusterLink *link) { memcpy(myself->ip, ip, NET_IP_STR_LEN); serverLog(LL_NOTICE, "IP address for this node updated to %s", myself->ip); clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG); + updateHumanNodenameToAddress(myself); } } diff --git a/tests/support/util.tcl b/tests/support/util.tcl index 5a001c04a8..c7e8e58101 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -201,7 +201,7 @@ proc verify_log_message {srv_idx pattern from_line} { incr from_line set result [exec tail -n +$from_line < [srv $srv_idx stdout]] if {![string match $pattern $result]} { - fail "expected message not found in log file: $pattern" + fail "expected pattern found in log file: $pattern, but instead got: $result" } } diff --git a/tests/unit/cluster/manual-failover.tcl b/tests/unit/cluster/manual-failover.tcl index 56952ff008..a585ba0127 100644 --- a/tests/unit/cluster/manual-failover.tcl +++ b/tests/unit/cluster/manual-failover.tcl @@ -418,6 +418,9 @@ start_cluster 3 1 {tags {external:skip cluster}} { set R2_nodeid [R 2 cluster myid] set R3_nodeid [R 3 cluster myid] + set R0_port [srv 0 port] + set R3_port [srv -3 port] + set R0_shardid [R 0 cluster myshardid] set R3_shardid [R 3 cluster myshardid] assert_equal $R0_shardid $R3_shardid @@ -478,21 +481,21 @@ start_cluster 3 1 {tags {external:skip cluster}} { # verify we print the logs. # Both importing slots and migrating slots are move to R3. - set pattern "*Failover occurred in migration source. Update importing source for slot 0 to node $R3_nodeid () in shard $R3_shardid*" + set pattern "*Failover occurred in migration source. Update importing source for slot 0 to node $R3_nodeid (127.0.0.1:$R3_port) in shard $R3_shardid*" verify_log_message -1 $pattern $loglines1 - set pattern "*Failover occurred in migration target. Slot 5462 is now being migrated to node $R3_nodeid () in shard $R3_shardid*" + set pattern "*Failover occurred in migration target. Slot 5462 is now being migrated to node $R3_nodeid (127.0.0.1:$R3_port) in shard $R3_shardid*" verify_log_message -1 $pattern $loglines1 # Both slots are move to R3. set R0_slots 5462 - set pattern "*A failover occurred in shard $R3_shardid; node $R0_nodeid () lost $R0_slots slot(s) and failed over to node $R3_nodeid*" + set pattern "*A failover occurred in shard $R3_shardid; node $R0_nodeid (127.0.0.1:$R0_port) lost $R0_slots slot(s) and failed over to node $R3_nodeid*" verify_log_message -1 $pattern $loglines1 verify_log_message -2 $pattern $loglines2 # Both importing slots and migrating slots are move to R3. - set pattern "*A failover occurred in migration source. Update importing source of 1 slot(s) to node $R3_nodeid () in shard $R3_shardid*" + set pattern "*A failover occurred in migration source. Update importing source of 1 slot(s) to node $R3_nodeid (127.0.0.1:$R3_port) in shard $R3_shardid*" verify_log_message -1 $pattern $loglines1 - set pattern "*A failover occurred in migration target. Update migrating target of 1 slot(s) to node $R3_nodeid () in shard $R3_shardid*" + set pattern "*A failover occurred in migration target. Update migrating target of 1 slot(s) to node $R3_nodeid (127.0.0.1:$R3_port) in shard $R3_shardid*" verify_log_message -1 $pattern $loglines1 R 1 debug disable-cluster-reconnection 0 From 236c36dd208ccef329472fa44b066dd41d57f89a Mon Sep 17 00:00:00 2001 From: Zhijun Date: Wed, 29 Oct 2025 07:59:21 +0800 Subject: [PATCH 5/6] Stick to human nodename if set by user Signed-off-by: Zhijun --- src/cluster_legacy.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index e8612ebb57..0c2563e789 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -1426,8 +1426,8 @@ void clusterInit(void) { * the server's IP and port as the nodename. This name will be * carried in the PING extension so that all nodes in the cluster * will know this name eventually. */ - if (server.cluster_announce_human_nodename != NULL && - server.cluster_announce_human_nodename[0] != '\0') + if (server.cluster_announce_human_nodename == NULL || + server.cluster_announce_human_nodename[0] == '\0') clusterUpdateMyselfHumanNodename(); else updateHumanNodenameToAddress(myself); @@ -3768,7 +3768,9 @@ int clusterProcessPacket(clusterLink *link) { memcpy(myself->ip, ip, NET_IP_STR_LEN); serverLog(LL_NOTICE, "IP address for this node updated to %s", myself->ip); clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG); - updateHumanNodenameToAddress(myself); + /* Update human nodename on IP change if the nodename is not explicitly set by user */ + if (server.cluster_announce_human_nodename == NULL || server.cluster_announce_human_nodename[0] == '\0') + updateHumanNodenameToAddress(myself); } } @@ -4500,7 +4502,7 @@ static void clusterBuildMessageHdr(clusterMsg *hdr, int type, size_t msglen) { /* If this node is a primary, we send its slots bitmap and configEpoch. * If this node is a replica we send the primary's information instead (the * node is flagged as replica so the receiver knows that it is NOT really - * in charge for this slots. */ + * in charge of these slots. */ primary = (nodeIsReplica(myself) && myself->replicaof) ? myself->replicaof : myself; hdr->ver = htons(CLUSTER_PROTO_VER); @@ -5408,7 +5410,7 @@ void clusterHandleReplicaFailover(void) { server.cluster->failover_auth_time - now, server.cluster->failover_auth_rank, server.cluster->failover_failed_primary_rank, replicationGetReplicaOffset()); /* Now that we have a scheduled election, broadcast our offset - * to all the other replicas so that they'll updated their offsets + * to all the other replicas so that they'll update their offsets * if our offset is better. */ clusterBroadcastPong(CLUSTER_BROADCAST_LOCAL_REPLICAS); From 503e5607adb4fca8479bac7ed1f8265bed81e06b Mon Sep 17 00:00:00 2001 From: Zhijun Date: Wed, 29 Oct 2025 14:17:28 +0800 Subject: [PATCH 6/6] I messed up the order Signed-off-by: Zhijun --- src/cluster_legacy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 0c2563e789..bcefcef680 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -1428,9 +1428,9 @@ void clusterInit(void) { * will know this name eventually. */ if (server.cluster_announce_human_nodename == NULL || server.cluster_announce_human_nodename[0] == '\0') - clusterUpdateMyselfHumanNodename(); - else updateHumanNodenameToAddress(myself); + else + clusterUpdateMyselfHumanNodename(); resetClusterStats(); }