-
Notifications
You must be signed in to change notification settings - Fork 941
Bound the number of nodes in gossip section #2746
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: unstable
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4600,13 +4600,26 @@ void clusterSendPing(clusterLink *link, int type) { | |
| * Since we have non-voting replicas that lower the probability of an entry | ||
| * to feature our node, we set the number of entries per packet as | ||
| * 10% of the total nodes we have. */ | ||
| wanted = floor(dictSize(server.cluster->nodes) / 10); | ||
| if (wanted < 3) wanted = 3; | ||
| if (wanted > freshnodes) wanted = freshnodes; | ||
| int overall = server.cluster_ping_message_gossip_max_count; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we have a it as a percentage? Also we are naming it to be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I was going to suggest the same. The default is a percentage so it seems appropriate to configure it as a percentage. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I like this. Will be easier for folks to deal with scale in/out situations. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about supporting both options? |
||
| if (!overall) { | ||
| overall = floor(dictSize(server.cluster->nodes) / 10); | ||
| if (overall < 3) overall = 3; | ||
| } | ||
|
|
||
| /* Include all the nodes in PFAIL state, so that failure reports are | ||
| * faster to propagate to go from PFAIL to FAIL state. */ | ||
| /* Prioritize pfail nodes over other nodes. | ||
| * Healthy nodes can communicate through direct ping/pong if required and failed node | ||
| * information would be broadcasted. */ | ||
| int pfail_wanted = server.cluster->stats_pfail_nodes; | ||
| if (pfail_wanted >= overall) { | ||
| pfail_wanted = overall - 1; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we set the why are we reserving one spot in overall for wanted? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I suggested that. Will update it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we foresee any regression we don't gossip healthy nodes at all? I am wondering in scenarios where PFAIL nodes are never actually marked as FAIL or healthy. |
||
| wanted = 1; | ||
| } else { | ||
| wanted = overall - pfail_wanted; | ||
| } | ||
|
|
||
| if (wanted > freshnodes) { | ||
| wanted = freshnodes; | ||
| } | ||
|
|
||
| /* Compute the maximum estlen to allocate our buffer. We'll fix the estlen | ||
| * later according to the number of gossip sections we really were able | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3350,13 +3350,16 @@ standardConfig static_configs[] = { | |
| createIntConfig("rdma-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.rdma_ctx_config.port, 0, INTEGER_CONFIG, NULL, updateRdmaPort), | ||
| createIntConfig("rdma-rx-size", NULL, IMMUTABLE_CONFIG, 64 * 1024, 16 * 1024 * 1024, server.rdma_ctx_config.rx_size, 1024 * 1024, INTEGER_CONFIG, NULL, NULL), | ||
| createIntConfig("rdma-completion-vector", NULL, IMMUTABLE_CONFIG, -1, 1024, server.rdma_ctx_config.completion_vector, -1, INTEGER_CONFIG, NULL, NULL), | ||
| createIntConfig("cluster-ping-message-gossip-max-count", NULL, MODIFIABLE_CONFIG, 0, 2000, server.cluster_ping_message_gossip_max_count, 0, INTEGER_CONFIG, NULL, NULL), | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can the max be a function of dictSize(server.cluster->nodes)? I mean it would be good to validate that we don't oversend the number of nodes in gossip. |
||
|
|
||
|
|
||
| /* Unsigned int configs */ | ||
| createUIntConfig("maxclients", NULL, MODIFIABLE_CONFIG, 1, UINT_MAX, server.maxclients, 10000, INTEGER_CONFIG, NULL, updateMaxclients), | ||
| createUIntConfig("unixsocketperm", NULL, IMMUTABLE_CONFIG, 0, 0777, server.unix_ctx_config.perm, 0, OCTAL_CONFIG, NULL, NULL), | ||
| createUIntConfig("socket-mark-id", NULL, IMMUTABLE_CONFIG, 0, UINT_MAX, server.socket_mark_id, 0, INTEGER_CONFIG, NULL, NULL), | ||
| createUIntConfig("max-new-connections-per-cycle", NULL, MODIFIABLE_CONFIG, 1, 1000, server.max_new_conns_per_cycle, 10, INTEGER_CONFIG, NULL, NULL), | ||
| createUIntConfig("max-new-tls-connections-per-cycle", NULL, MODIFIABLE_CONFIG, 1, 1000, server.max_new_tls_conns_per_cycle, 1, INTEGER_CONFIG, NULL, NULL), | ||
|
|
||
| #ifdef LOG_REQ_RES | ||
| createUIntConfig("client-default-resp", NULL, IMMUTABLE_CONFIG | HIDDEN_CONFIG, 2, 3, server.client_default_resp, 2, INTEGER_CONFIG, NULL, NULL), | ||
| #endif | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to update the comment?