Skip to content

Commit 2b620fa

Browse files
author
Ken Gaillot
authored
Merge pull request #2043 from gao-yan/priority-fencing-delay-1.1
[1.1] Feature: priority-fencing-delay
2 parents 63d2d79 + 7367658 commit 2b620fa

25 files changed

+1799
-64
lines changed

crmd/te_actions.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ te_fence_node(crm_graph_t * graph, crm_action_t * action)
164164
const char *uuid = NULL;
165165
const char *target = NULL;
166166
const char *type = NULL;
167+
const char *priority_delay = NULL;
167168
gboolean invalid_action = FALSE;
168169
enum stonith_call_options options = st_opt_none;
169170

@@ -182,9 +183,11 @@ te_fence_node(crm_graph_t * graph, crm_action_t * action)
182183
return FALSE;
183184
}
184185

186+
priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY);
187+
185188
crm_notice("Requesting fencing (%s) of node %s "
186-
CRM_XS " action=%s timeout=%d",
187-
type, target, id, transition_graph->stonith_timeout);
189+
CRM_XS " action=%s timeout=%u priority_delay=%s",
190+
type, target, id, transition_graph->stonith_timeout, priority_delay);
188191

189192
/* Passing NULL means block until we can connect... */
190193
te_connect_stonith(NULL);
@@ -193,8 +196,9 @@ te_fence_node(crm_graph_t * graph, crm_action_t * action)
193196
options |= st_opt_allow_suicide;
194197
}
195198

196-
rc = stonith_api->cmds->fence(stonith_api, options, target, type,
197-
transition_graph->stonith_timeout / 1000, 0);
199+
rc = stonith_api->cmds->fence_with_delay(stonith_api, options, target, type,
200+
(int) (transition_graph->stonith_timeout / 1000),
201+
0, crm_atoi(priority_delay, "0"));
198202

199203
stonith_api->cmds->register_callback(stonith_api, rc, transition_graph->stonith_timeout / 1000,
200204
st_opt_timeout_updates,

doc/Pacemaker_Explained/en-US/Ch-Options.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,22 @@ indexterm:[Cluster,Option,concurrent-fencing]
241241
Is the cluster allowed to initiate multiple fence actions concurrently?
242242
'(since 1.1.15)'
243243

244+
| priority-fencing-delay | 0 |
245+
indexterm:[priority-fencing-delay,Cluster Option]
246+
indexterm:[Cluster,Option,priority-fencing-delay]
247+
Apply specified delay for the fencings that are targeting the lost
248+
nodes with the highest total resource priority in case we don't
249+
have the majority of the nodes in our cluster partition, so that
250+
the more significant nodes potentially win any fencing match,
251+
which is especially meaningful under split-brain of 2-node
252+
cluster. A promoted resource instance takes the base priority + 1
253+
on calculation if the base priority is not 0. Any static/random
254+
delays that are introduced by `pcmk_delay_base/max` configured
255+
for the corresponding fencing resources will be added to this
256+
delay. This delay should be significantly greater than, safely
257+
twice, the maximum `pcmk_delay_base/max`. By default, priority
258+
fencing delay is disabled. '(since 1.1.23)'
259+
244260
| cluster-delay | 60s |
245261
indexterm:[cluster-delay,Cluster Option]
246262
indexterm:[Cluster,Option,cluster-delay]

fencing/admin.c

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,12 @@ static struct crm_option long_options[] = {
176176
"Operation timeout in seconds (default 120;\n"
177177
"\t\t\tused with most commands)."
178178
},
179+
{ "delay", required_argument, NULL, 'y',
180+
"Apply a fencing delay in seconds. Any static/random delays from\n"
181+
"\t\t\tpcmk_delay_base/max will be added, otherwise all\n"
182+
"\t\t\tdisabled with the value -1\n"
183+
"\t\t\t(default 0; with --fence, --reboot, --unfence)."
184+
},
179185
{ "as-node-id", no_argument, NULL, 'n',
180186
"(Advanced) The supplied node is the corosync node ID\n"
181187
"\t\t\t(with --last)."
@@ -201,6 +207,7 @@ struct {
201207
char *name;
202208
int timeout;
203209
int tolerance;
210+
int delay;
204211
int rc;
205212
} async_fence_data;
206213

@@ -265,11 +272,13 @@ async_fence_helper(gpointer user_data)
265272

266273
st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, notify_callback);
267274

268-
call_id = st->cmds->fence(st,
269-
st_opt_allow_suicide,
270-
async_fence_data.target,
271-
async_fence_data.action,
272-
async_fence_data.timeout, async_fence_data.tolerance);
275+
call_id = st->cmds->fence_with_delay(st,
276+
st_opt_allow_suicide,
277+
async_fence_data.target,
278+
async_fence_data.action,
279+
async_fence_data.timeout,
280+
async_fence_data.tolerance,
281+
async_fence_data.delay);
273282

274283
if (call_id < 0) {
275284
g_main_loop_quit(mainloop);
@@ -285,7 +294,8 @@ async_fence_helper(gpointer user_data)
285294
}
286295

287296
static int
288-
mainloop_fencing(stonith_t * st, const char *target, const char *action, int timeout, int tolerance)
297+
mainloop_fencing(stonith_t * st, const char *target, const char *action,
298+
int timeout, int tolerance, int delay)
289299
{
290300
crm_trigger_t *trig;
291301

@@ -294,6 +304,7 @@ mainloop_fencing(stonith_t * st, const char *target, const char *action, int tim
294304
async_fence_data.action = action;
295305
async_fence_data.timeout = timeout;
296306
async_fence_data.tolerance = tolerance;
307+
async_fence_data.delay = delay;
297308
async_fence_data.rc = -1;
298309

299310
trig = mainloop_add_trigger(G_PRIORITY_HIGH, async_fence_helper, NULL);
@@ -492,6 +503,7 @@ main(int argc, char **argv)
492503
int verbose = 0;
493504
int argerr = 0;
494505
int timeout = 120;
506+
int delay = 0;
495507
int option_index = 0;
496508
int fence_level = 0;
497509
int no_connect = 0;
@@ -574,6 +586,9 @@ main(int argc, char **argv)
574586
case 't':
575587
timeout = crm_atoi(optarg, NULL);
576588
break;
589+
case 'y':
590+
delay = crm_atoi(optarg, NULL);
591+
break;
577592
case 'B':
578593
case 'F':
579594
case 'U':
@@ -760,13 +775,13 @@ main(int argc, char **argv)
760775
rc = st->cmds->confirm(st, st_opts, target);
761776
break;
762777
case 'B':
763-
rc = mainloop_fencing(st, target, "reboot", timeout, tolerance);
778+
rc = mainloop_fencing(st, target, "reboot", timeout, tolerance, delay);
764779
break;
765780
case 'F':
766-
rc = mainloop_fencing(st, target, "off", timeout, tolerance);
781+
rc = mainloop_fencing(st, target, "off", timeout, tolerance, delay);
767782
break;
768783
case 'U':
769-
rc = mainloop_fencing(st, target, "on", timeout, tolerance);
784+
rc = mainloop_fencing(st, target, "on", timeout, tolerance, delay);
770785
break;
771786
case 'h':
772787
show_last_fenced(as_nodeid, target);

fencing/commands.c

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ typedef struct async_command_s {
8080
int default_timeout; /* seconds */
8181
int timeout; /* seconds */
8282

83-
int start_delay; /* milliseconds */
83+
int start_delay; /* seconds */
8484
int delay_id;
8585

8686
char *op;
@@ -123,36 +123,36 @@ static int
123123
get_action_delay_max(stonith_device_t * device, const char * action)
124124
{
125125
const char *value = NULL;
126-
int delay_max_ms = 0;
126+
int delay_max = 0;
127127

128128
if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) {
129129
return 0;
130130
}
131131

132132
value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_MAX);
133133
if (value) {
134-
delay_max_ms = crm_get_msec(value);
134+
delay_max = crm_get_msec(value) / 1000;
135135
}
136136

137-
return delay_max_ms;
137+
return delay_max;
138138
}
139139

140140
static int
141141
get_action_delay_base(stonith_device_t * device, const char * action)
142142
{
143143
const char *value = NULL;
144-
int delay_base_ms = 0;
144+
int delay_base = 0;
145145

146146
if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) {
147147
return 0;
148148
}
149149

150150
value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_BASE);
151151
if (value) {
152-
delay_base_ms = crm_get_msec(value);
152+
delay_base = crm_get_msec(value) / 1000;
153153
}
154154

155-
return delay_base_ms;
155+
return delay_base;
156156
}
157157

158158
/*!
@@ -243,6 +243,8 @@ create_async_command(xmlNode * msg)
243243
crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
244244
crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout));
245245
cmd->timeout = cmd->default_timeout;
246+
// Value -1 means disable any static/random fencing delays
247+
crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay));
246248

247249
cmd->origin = crm_element_value_copy(msg, F_ORIG);
248250
cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID);
@@ -349,7 +351,7 @@ stonith_device_execute(stonith_device_t * device)
349351

350352
if (pending_op && pending_op->delay_id) {
351353
crm_trace
352-
("Operation '%s'%s%s on %s was asked to run too early, waiting for start_delay timeout of %dms",
354+
("Operation '%s'%s%s on %s was asked to run too early, waiting for start_delay timeout of %ds",
353355
pending_op->action, pending_op->victim ? " targeting " : "",
354356
pending_op->victim ? pending_op->victim : "",
355357
device->id, pending_op->start_delay);
@@ -464,6 +466,7 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
464466
{
465467
int delay_max = 0;
466468
int delay_base = 0;
469+
int requested_delay = cmd->start_delay;
467470

468471
CRM_CHECK(cmd != NULL, return);
469472
CRM_CHECK(device != NULL, return);
@@ -496,30 +499,38 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
496499
device->pending_ops = g_list_append(device->pending_ops, cmd);
497500
mainloop_set_trigger(device->work);
498501

502+
// Value -1 means disable any static/random fencing delays
503+
if (requested_delay < 0) {
504+
return;
505+
}
506+
499507
delay_max = get_action_delay_max(device, cmd->action);
500508
delay_base = get_action_delay_base(device, cmd->action);
501509
if (delay_max == 0) {
502510
delay_max = delay_base;
503511
}
504512
if (delay_max < delay_base) {
505-
crm_warn("Base-delay (%dms) is larger than max-delay (%dms) "
513+
crm_warn("Base-delay (%ds) is larger than max-delay (%ds) "
506514
"for %s on %s - limiting to max-delay",
507515
delay_base, delay_max, cmd->action, device->id);
508516
delay_base = delay_max;
509517
}
510518
if (delay_max > 0) {
511519
// coverity[dont_call] We're not using rand() for security
512-
cmd->start_delay =
520+
cmd->start_delay +=
513521
((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
514522
+ delay_base;
515-
crm_notice("Delaying '%s' action%s%s on %s for %dms (timeout=%ds, base=%dms, "
516-
"max=%dms)",
517-
cmd->action,
518-
cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
519-
device->id, cmd->start_delay, cmd->timeout,
520-
delay_base, delay_max);
523+
}
524+
525+
if (cmd->start_delay > 0) {
526+
crm_notice("Delaying '%s' action%s%s on %s for %ds (timeout=%ds, "
527+
"requested_delay=%ds, base=%ds, max=%ds)",
528+
cmd->action,
529+
cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
530+
device->id, cmd->start_delay, cmd->timeout,
531+
requested_delay, delay_base, delay_max);
521532
cmd->delay_id =
522-
g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
533+
g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd);
523534
}
524535
}
525536

fencing/internal.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ typedef struct remote_fencing_op_s {
9898
* values associated with the devices this fencing operation may call */
9999
gint total_timeout;
100100

101+
/*! Requested fencing delay.
102+
* Value -1 means disable any static/random fencing delays. */
103+
int delay;
104+
101105
/*! Delegate is the node being asked to perform a fencing action
102106
* on behalf of the node that owns the remote operation. Some operations
103107
* will involve multiple delegates. This value represents the final delegate

fencing/regression.py.in

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,36 @@ class Tests(object):
952952
test.add_stonith_neg_log_pattern("does not advertise support for 'reboot', performing 'off'")
953953
test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)")
954954

955+
# make sure requested fencing delay is applied only for the first device in the first level
956+
# make sure static delay from pcmk_delay_base is added
957+
for test_type in test_types:
958+
if test_type["use_cpg"] == 0:
959+
continue
960+
961+
test = self.new_test("%s_topology_delay" % test_type["prefix"],
962+
"Verify requested fencing delay is applied only for the first device in the first level and pcmk_delay_base is added.",
963+
test_type["use_cpg"])
964+
test.add_cmd("stonith_admin",
965+
"-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\" -o \"pcmk_delay_base=1\"")
966+
test.add_cmd("stonith_admin",
967+
"-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\" -o \"pcmk_delay_base=1\"")
968+
test.add_cmd("stonith_admin",
969+
"-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
970+
test.add_cmd("stonith_admin",
971+
"-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
972+
973+
test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1")
974+
test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
975+
test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2")
976+
test.add_cmd("stonith_admin", "-r node3 -i 2 -v true3")
977+
978+
test.add_cmd("stonith_admin", "-F node3 --delay 1")
979+
980+
test.add_stonith_log_pattern("Delaying 'off' action targeting node3 on true1 for 2s (timeout=120s, requested_delay=1s, base=1s, max=1s)")
981+
test.add_stonith_log_pattern("Delaying 'off' action targeting node3 on false1 for 1s (timeout=120s, requested_delay=0s, base=1s, max=1s)")
982+
test.add_stonith_neg_log_pattern("Delaying 'off' action targeting node3 on true2")
983+
test.add_stonith_neg_log_pattern("Delaying 'off' action targeting node3 on true3")
984+
955985
def build_nodeid_tests(self):
956986
""" Register tests that use a corosync node id """
957987

fencing/remote.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,11 @@ stonith_topology_next(remote_fencing_op_t * op)
835835
op->client_name, op->originator, op->id);
836836
set_op_device_list(op, tp->levels[op->level]);
837837

838+
// The requested delay has been applied for the first fencing level
839+
if (op->level > 1 && op->delay > 0) {
840+
op->delay = 0;
841+
}
842+
838843
if (g_list_next(op->devices_list) && safe_str_eq(op->action, "reboot")) {
839844
/* A reboot has been requested for a topology level with multiple
840845
* devices. Instead of rebooting the devices sequentially, we will
@@ -992,6 +997,8 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
992997
op = calloc(1, sizeof(remote_fencing_op_t));
993998

994999
crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout));
1000+
// Value -1 means disable any static/random fencing delays
1001+
crm_element_value_int(request, F_STONITH_DELAY, &(op->delay));
9951002

9961003
if (peer && dev) {
9971004
op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
@@ -1440,6 +1447,12 @@ advance_op_topology(remote_fencing_op_t *op, const char *device, xmlNode *msg,
14401447
/* Necessary devices remain, so execute the next one */
14411448
crm_trace("Next targeting %s on behalf of %s@%s (rc was %d)",
14421449
op->target, op->originator, op->client_name, rc);
1450+
1451+
// The requested delay has been applied for the first device
1452+
if (op->delay > 0) {
1453+
op->delay = 0;
1454+
}
1455+
14431456
call_remote_stonith(op, NULL);
14441457
} else {
14451458
/* We're done with all devices and phases, so finalize operation */
@@ -1494,6 +1507,7 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
14941507
crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
14951508
crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
14961509
crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
1510+
crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay);
14971511

14981512
if (device) {
14991513
timeout_one = TIMEOUT_MULTIPLY_FACTOR *

include/crm/fencing/internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ xmlNode *create_device_registration_xml(const char *id,
5757
/*! Timeout period per a device execution */
5858
# define F_STONITH_TIMEOUT "st_timeout"
5959
# define F_STONITH_TOLERANCE "st_tolerance"
60+
# define F_STONITH_DELAY "st_delay"
6061
/*! Action specific timeout period returned in query of fencing devices. */
6162
# define F_STONITH_ACTION_TIMEOUT "st_action_timeout"
6263
/*! Host in query result is not allowed to run this action */

include/crm/msg_xml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,7 @@
378378
# define XML_CONFIG_ATTR_FORCE_QUIT "shutdown-escalation"
379379
# define XML_CONFIG_ATTR_RECHECK "cluster-recheck-interval"
380380
# define XML_CONFIG_ATTR_FENCE_REACTION "fence-reaction"
381+
# define XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY "priority-fencing-delay"
381382

382383
# define XML_ALERT_ATTR_PATH "path"
383384
# define XML_ALERT_ATTR_TIMEOUT "timeout"

include/crm/pengine/internal.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ typedef struct op_digest_cache_s {
329329
op_digest_cache_t *rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node,
330330
pe_working_set_t * data_set);
331331

332-
action_t *pe_fence_op(node_t * node, const char *op, bool optional, const char *reason, pe_working_set_t * data_set);
332+
pe_action_t *pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t * data_set);
333333
void trigger_unfencing(
334334
resource_t * rsc, node_t *node, const char *reason, action_t *dependency, pe_working_set_t * data_set);
335335

@@ -346,7 +346,7 @@ gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj
346346

347347
void print_rscs_brief(GListPtr rsc_list, const char * pre_text, long options,
348348
void * print_data, gboolean print_all);
349-
void pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason);
349+
void pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason, bool priority_delay);
350350

351351
node_t *pe_create_node(const char *id, const char *uname, const char *type,
352352
const char *score, pe_working_set_t * data_set);

0 commit comments

Comments
 (0)