diff --git a/cts/cts-fencing.in b/cts/cts-fencing.in index 2d9999ca01a..239319cb169 100644 --- a/cts/cts-fencing.in +++ b/cts/cts-fencing.in @@ -1126,6 +1126,35 @@ class Tests(object): test.add_stonith_neg_log_pattern("does not advertise support for 'reboot', performing 'off'") test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)") + # make sure enforced fencing delay is applied only for the first device in the first level + for test_type in test_types: + if test_type["use_cpg"] == 0: + continue + + test = self.new_test("%s_topology_delay" % test_type["prefix"], + "Verify enforced fencing delay is applied only for the first device in the first level.", + test_type["use_cpg"]) + test.add_cmd("stonith_admin", + "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", + "--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + + test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true1") + test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1") + test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true2") + test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 2 -v true3") + + test.add_cmd("stonith_admin", "--output-as=xml -F node3 --delay 1") + + test.add_stonith_log_pattern("Delaying 'off' action targeting node3 on true1 for enforced 1s") + test.add_stonith_neg_log_pattern("Delaying 'off' action targeting node3 on false1") + test.add_stonith_neg_log_pattern("Delaying 'off' action targeting node3 on true2") + test.add_stonith_neg_log_pattern("Delaying 'off' action targeting node3 on true3") + def build_nodeid_tests(self): """ Register tests that use a corosync node id """ diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in index f2957bad660..99436f1bb8e 100644 --- a/cts/cts-scheduler.in +++ b/cts/cts-scheduler.in @@ -625,6 +625,7 @@ TESTS = [ [ "order-first-probes", "cl#5301 - respect order constraints when relevant resources are being probed" ], [ "concurrent-fencing", "Allow performing fencing operations in parallel" ], + [ "priority-fencing-delay", "Delay fencing targeting the more significant node" ], ], [ [ "systemhealth1", "System Health () #1" ], diff --git a/cts/scheduler/priority-fencing-delay.dot b/cts/scheduler/priority-fencing-delay.dot new file mode 100644 index 00000000000..62ba699eb6d --- /dev/null +++ b/cts/scheduler/priority-fencing-delay.dot @@ -0,0 +1,109 @@ + digraph "g" { +"R-lxc-01_kiff-01_monitor_10000 kiff-02" [ style=bold color="green" fontcolor="black"] +"R-lxc-01_kiff-01_start_0 kiff-02" -> "R-lxc-01_kiff-01_monitor_10000 kiff-02" [ style = bold] +"R-lxc-01_kiff-01_start_0 kiff-02" -> "lxc-01_kiff-01_start_0 kiff-02" [ style = bold] +"R-lxc-01_kiff-01_start_0 kiff-02" -> "vm-fs_start_0 lxc-01_kiff-01" [ style = bold] +"R-lxc-01_kiff-01_start_0 kiff-02" [ style=bold color="green" fontcolor="black"] +"R-lxc-01_kiff-01_stop_0 kiff-01" -> "R-lxc-01_kiff-01_start_0 kiff-02" [ style = bold] +"R-lxc-01_kiff-01_stop_0 kiff-01" -> "shared0-clone_stop_0" [ style = bold] +"R-lxc-01_kiff-01_stop_0 kiff-01" [ style=bold color="green" fontcolor="orange"] +"R-lxc-02_kiff-01_monitor_10000 kiff-02" [ style=bold color="green" fontcolor="black"] +"R-lxc-02_kiff-01_start_0 kiff-02" -> "R-lxc-02_kiff-01_monitor_10000 kiff-02" [ style = bold] +"R-lxc-02_kiff-01_start_0 kiff-02" -> "lxc-02_kiff-01_start_0 kiff-02" [ style = bold] +"R-lxc-02_kiff-01_start_0 kiff-02" [ style=bold color="green" fontcolor="black"] +"R-lxc-02_kiff-01_stop_0 kiff-01" -> "R-lxc-02_kiff-01_start_0 kiff-02" [ style = bold] +"R-lxc-02_kiff-01_stop_0 kiff-01" -> "shared0-clone_stop_0" [ style = bold] +"R-lxc-02_kiff-01_stop_0 kiff-01" -> "vm-fs_start_0 lxc-01_kiff-01" [ style = bold] +"R-lxc-02_kiff-01_stop_0 kiff-01" [ style=bold color="green" fontcolor="orange"] +"clvmd-clone_stop_0" -> "clvmd-clone_stopped_0" [ style = bold] +"clvmd-clone_stop_0" -> "clvmd_stop_0 kiff-01" [ style = bold] +"clvmd-clone_stop_0" [ style=bold color="green" fontcolor="orange"] +"clvmd-clone_stopped_0" -> "dlm-clone_stop_0" [ style = bold] +"clvmd-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] +"clvmd_monitor_0 lxc-01_kiff-02" -> "clvmd-clone_stopped_0" [ style = bold] +"clvmd_monitor_0 lxc-01_kiff-02" [ style=bold color="green" fontcolor="black"] +"clvmd_monitor_0 lxc-02_kiff-02" -> "clvmd-clone_stopped_0" [ style = bold] +"clvmd_monitor_0 lxc-02_kiff-02" [ style=bold color="green" fontcolor="black"] +"clvmd_stop_0 kiff-01" -> "clvmd-clone_stopped_0" [ style = bold] +"clvmd_stop_0 kiff-01" -> "dlm_stop_0 kiff-01" [ style = bold] +"clvmd_stop_0 kiff-01" [ style=bold color="green" fontcolor="orange"] +"dlm-clone_stop_0" -> "dlm-clone_stopped_0" [ style = bold] +"dlm-clone_stop_0" -> "dlm_stop_0 kiff-01" [ style = bold] +"dlm-clone_stop_0" [ style=bold color="green" fontcolor="orange"] +"dlm-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] +"dlm_monitor_0 lxc-01_kiff-02" -> "dlm-clone_stopped_0" [ style = bold] +"dlm_monitor_0 lxc-01_kiff-02" [ style=bold color="green" fontcolor="black"] +"dlm_monitor_0 lxc-02_kiff-02" -> "dlm-clone_stopped_0" [ style = bold] +"dlm_monitor_0 lxc-02_kiff-02" [ style=bold color="green" fontcolor="black"] +"dlm_stop_0 kiff-01" -> "dlm-clone_stopped_0" [ style = bold] +"dlm_stop_0 kiff-01" [ style=bold color="green" fontcolor="orange"] +"fence-kiff-02_monitor_60000 kiff-02" [ style=bold color="green" fontcolor="black"] +"fence-kiff-02_start_0 kiff-02" -> "fence-kiff-02_monitor_60000 kiff-02" [ style = bold] +"fence-kiff-02_start_0 kiff-02" [ style=bold color="green" fontcolor="black"] +"fence-kiff-02_stop_0 kiff-01" -> "fence-kiff-02_start_0 kiff-02" [ style = bold] +"fence-kiff-02_stop_0 kiff-01" [ style=bold color="green" fontcolor="orange"] +"lxc-01_kiff-01_monitor_30000 kiff-02" [ style=bold color="green" fontcolor="black"] +"lxc-01_kiff-01_start_0 kiff-02" -> "lxc-01_kiff-01_monitor_30000 kiff-02" [ style = bold] +"lxc-01_kiff-01_start_0 kiff-02" -> "vm-fs_monitor_20000 lxc-01_kiff-01" [ style = bold] +"lxc-01_kiff-01_start_0 kiff-02" -> "vm-fs_start_0 lxc-01_kiff-01" [ style = bold] +"lxc-01_kiff-01_start_0 kiff-02" [ style=bold color="green" fontcolor="black"] +"lxc-01_kiff-01_stop_0 kiff-01" -> "R-lxc-01_kiff-01_stop_0 kiff-01" [ style = bold] +"lxc-01_kiff-01_stop_0 kiff-01" -> "lxc-01_kiff-01_start_0 kiff-02" [ style = bold] +"lxc-01_kiff-01_stop_0 kiff-01" [ style=bold color="green" fontcolor="orange"] +"lxc-02_kiff-01_monitor_30000 kiff-02" [ style=bold color="green" fontcolor="black"] +"lxc-02_kiff-01_start_0 kiff-02" -> "lxc-02_kiff-01_monitor_30000 kiff-02" [ style = bold] +"lxc-02_kiff-01_start_0 kiff-02" [ style=bold color="green" fontcolor="black"] +"lxc-02_kiff-01_stop_0 kiff-01" -> "R-lxc-02_kiff-01_stop_0 kiff-01" [ style = bold] +"lxc-02_kiff-01_stop_0 kiff-01" -> "lxc-02_kiff-01_start_0 kiff-02" [ style = bold] +"lxc-02_kiff-01_stop_0 kiff-01" [ style=bold color="green" fontcolor="orange"] +"shared0-clone_stop_0" -> "shared0-clone_stopped_0" [ style = bold] +"shared0-clone_stop_0" -> "shared0_stop_0 kiff-01" [ style = bold] +"shared0-clone_stop_0" [ style=bold color="green" fontcolor="orange"] +"shared0-clone_stopped_0" -> "clvmd-clone_stop_0" [ style = bold] +"shared0-clone_stopped_0" [ style=bold color="green" fontcolor="orange"] +"shared0_monitor_0 lxc-01_kiff-02" -> "shared0-clone_stopped_0" [ style = bold] +"shared0_monitor_0 lxc-01_kiff-02" [ style=bold color="green" fontcolor="black"] +"shared0_monitor_0 lxc-02_kiff-02" -> "shared0-clone_stopped_0" [ style = bold] +"shared0_monitor_0 lxc-02_kiff-02" [ style=bold color="green" fontcolor="black"] +"shared0_stop_0 kiff-01" -> "clvmd_stop_0 kiff-01" [ style = bold] +"shared0_stop_0 kiff-01" -> "shared0-clone_stopped_0" [ style = bold] +"shared0_stop_0 kiff-01" [ style=bold color="green" fontcolor="orange"] +"stonith 'reboot' kiff-01" -> "R-lxc-01_kiff-01_start_0 kiff-02" [ style = bold] +"stonith 'reboot' kiff-01" -> "R-lxc-01_kiff-01_stop_0 kiff-01" [ style = bold] +"stonith 'reboot' kiff-01" -> "R-lxc-02_kiff-01_start_0 kiff-02" [ style = bold] +"stonith 'reboot' kiff-01" -> "R-lxc-02_kiff-01_stop_0 kiff-01" [ style = bold] +"stonith 'reboot' kiff-01" -> "clvmd-clone_stop_0" [ style = bold] +"stonith 'reboot' kiff-01" -> "clvmd_stop_0 kiff-01" [ style = bold] +"stonith 'reboot' kiff-01" -> "dlm-clone_stop_0" [ style = bold] +"stonith 'reboot' kiff-01" -> "dlm_stop_0 kiff-01" [ style = bold] +"stonith 'reboot' kiff-01" -> "shared0-clone_stop_0" [ style = bold] +"stonith 'reboot' kiff-01" -> "shared0_stop_0 kiff-01" [ style = bold] +"stonith 'reboot' kiff-01" -> "stonith 'reboot' lxc-01_kiff-01" [ style = bold] +"stonith 'reboot' kiff-01" -> "stonith 'reboot' lxc-02_kiff-01" [ style = bold] +"stonith 'reboot' kiff-01" -> "vm-fs_start_0 lxc-01_kiff-01" [ style = bold] +"stonith 'reboot' kiff-01" [ style=bold color="green" fontcolor="black"] +"stonith 'reboot' lxc-01_kiff-01" -> "R-lxc-01_kiff-01_start_0 kiff-02" [ style = bold] +"stonith 'reboot' lxc-01_kiff-01" -> "R-lxc-02_kiff-01_start_0 kiff-02" [ style = bold] +"stonith 'reboot' lxc-01_kiff-01" -> "fence-kiff-02_start_0 kiff-02" [ style = bold] +"stonith 'reboot' lxc-01_kiff-01" -> "lxc-01_kiff-01_start_0 kiff-02" [ style = bold] +"stonith 'reboot' lxc-01_kiff-01" -> "lxc-02_kiff-01_start_0 kiff-02" [ style = bold] +"stonith 'reboot' lxc-01_kiff-01" -> "vm-fs_start_0 lxc-01_kiff-01" [ style = bold] +"stonith 'reboot' lxc-01_kiff-01" -> "vm-fs_stop_0 lxc-01_kiff-01" [ style = bold] +"stonith 'reboot' lxc-01_kiff-01" [ style=bold color="green" fontcolor="orange"] +"stonith 'reboot' lxc-02_kiff-01" -> "R-lxc-01_kiff-01_start_0 kiff-02" [ style = bold] +"stonith 'reboot' lxc-02_kiff-01" -> "R-lxc-02_kiff-01_start_0 kiff-02" [ style = bold] +"stonith 'reboot' lxc-02_kiff-01" -> "fence-kiff-02_start_0 kiff-02" [ style = bold] +"stonith 'reboot' lxc-02_kiff-01" -> "lxc-01_kiff-01_start_0 kiff-02" [ style = bold] +"stonith 'reboot' lxc-02_kiff-01" -> "lxc-02_kiff-01_start_0 kiff-02" [ style = bold] +"stonith 'reboot' lxc-02_kiff-01" -> "vm-fs_start_0 lxc-01_kiff-01" [ style = bold] +"stonith 'reboot' lxc-02_kiff-01" [ style=bold color="green" fontcolor="orange"] +"vm-fs_monitor_0 lxc-01_kiff-02" -> "vm-fs_start_0 lxc-01_kiff-01" [ style = bold] +"vm-fs_monitor_0 lxc-01_kiff-02" [ style=bold color="green" fontcolor="black"] +"vm-fs_monitor_0 lxc-02_kiff-02" -> "vm-fs_start_0 lxc-01_kiff-01" [ style = bold] +"vm-fs_monitor_0 lxc-02_kiff-02" [ style=bold color="green" fontcolor="black"] +"vm-fs_monitor_20000 lxc-01_kiff-01" [ style=bold color="green" fontcolor="black"] +"vm-fs_start_0 lxc-01_kiff-01" -> "vm-fs_monitor_20000 lxc-01_kiff-01" [ style = bold] +"vm-fs_start_0 lxc-01_kiff-01" [ style=bold color="green" fontcolor="black"] +"vm-fs_stop_0 lxc-01_kiff-01" -> "vm-fs_start_0 lxc-01_kiff-01" [ style = bold] +"vm-fs_stop_0 lxc-01_kiff-01" [ style=bold color="green" fontcolor="orange"] +} diff --git a/cts/scheduler/priority-fencing-delay.exp b/cts/scheduler/priority-fencing-delay.exp new file mode 100644 index 00000000000..c6315a1280a --- /dev/null +++ b/cts/scheduler/priority-fencing-delay.exp @@ -0,0 +1,570 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/cts/scheduler/priority-fencing-delay.scores b/cts/scheduler/priority-fencing-delay.scores new file mode 100644 index 00000000000..b96175efbf1 --- /dev/null +++ b/cts/scheduler/priority-fencing-delay.scores @@ -0,0 +1,301 @@ +Allocation scores: +pcmk__clone_allocate: clvmd-clone allocation score on kiff-01: 0 +pcmk__clone_allocate: clvmd-clone allocation score on kiff-02: 0 +pcmk__clone_allocate: clvmd-clone allocation score on lxc-01_kiff-01: 0 +pcmk__clone_allocate: clvmd-clone allocation score on lxc-01_kiff-02: 0 +pcmk__clone_allocate: clvmd-clone allocation score on lxc-02_kiff-01: 0 +pcmk__clone_allocate: clvmd-clone allocation score on lxc-02_kiff-02: 0 +pcmk__clone_allocate: clvmd:0 allocation score on kiff-01: 1 +pcmk__clone_allocate: clvmd:0 allocation score on kiff-02: 0 +pcmk__clone_allocate: clvmd:0 allocation score on lxc-01_kiff-01: 0 +pcmk__clone_allocate: clvmd:0 allocation score on lxc-01_kiff-02: 0 +pcmk__clone_allocate: clvmd:0 allocation score on lxc-02_kiff-01: 0 +pcmk__clone_allocate: clvmd:0 allocation score on lxc-02_kiff-02: 0 +pcmk__clone_allocate: clvmd:1 allocation score on kiff-01: 0 +pcmk__clone_allocate: clvmd:1 allocation score on kiff-02: 1 +pcmk__clone_allocate: clvmd:1 allocation score on lxc-01_kiff-01: 0 +pcmk__clone_allocate: clvmd:1 allocation score on lxc-01_kiff-02: 0 +pcmk__clone_allocate: clvmd:1 allocation score on lxc-02_kiff-01: 0 +pcmk__clone_allocate: clvmd:1 allocation score on lxc-02_kiff-02: 0 +pcmk__clone_allocate: clvmd:2 allocation score on kiff-01: 0 +pcmk__clone_allocate: clvmd:2 allocation score on kiff-02: 0 +pcmk__clone_allocate: clvmd:2 allocation score on lxc-01_kiff-01: 0 +pcmk__clone_allocate: clvmd:2 allocation score on lxc-01_kiff-02: 0 +pcmk__clone_allocate: clvmd:2 allocation score on lxc-02_kiff-01: 0 +pcmk__clone_allocate: clvmd:2 allocation score on lxc-02_kiff-02: 0 +pcmk__clone_allocate: clvmd:3 allocation score on kiff-01: 0 +pcmk__clone_allocate: clvmd:3 allocation score on kiff-02: 0 +pcmk__clone_allocate: clvmd:3 allocation score on lxc-01_kiff-01: 0 +pcmk__clone_allocate: clvmd:3 allocation score on lxc-01_kiff-02: 0 +pcmk__clone_allocate: clvmd:3 allocation score on lxc-02_kiff-01: 0 +pcmk__clone_allocate: clvmd:3 allocation score on lxc-02_kiff-02: 0 +pcmk__clone_allocate: clvmd:4 allocation score on kiff-01: 0 +pcmk__clone_allocate: clvmd:4 allocation score on kiff-02: 0 +pcmk__clone_allocate: clvmd:4 allocation score on lxc-01_kiff-01: 0 +pcmk__clone_allocate: clvmd:4 allocation score on lxc-01_kiff-02: 0 +pcmk__clone_allocate: clvmd:4 allocation score on lxc-02_kiff-01: 0 +pcmk__clone_allocate: clvmd:4 allocation score on lxc-02_kiff-02: 0 +pcmk__clone_allocate: clvmd:5 allocation score on kiff-01: 0 +pcmk__clone_allocate: clvmd:5 allocation score on kiff-02: 0 +pcmk__clone_allocate: clvmd:5 allocation score on lxc-01_kiff-01: 0 +pcmk__clone_allocate: clvmd:5 allocation score on lxc-01_kiff-02: 0 +pcmk__clone_allocate: clvmd:5 allocation score on lxc-02_kiff-01: 0 +pcmk__clone_allocate: clvmd:5 allocation score on lxc-02_kiff-02: 0 +pcmk__clone_allocate: dlm-clone allocation score on kiff-01: 0 +pcmk__clone_allocate: dlm-clone allocation score on kiff-02: 0 +pcmk__clone_allocate: dlm-clone allocation score on lxc-01_kiff-01: -INFINITY +pcmk__clone_allocate: dlm-clone allocation score on lxc-01_kiff-02: -INFINITY +pcmk__clone_allocate: dlm-clone allocation score on lxc-02_kiff-01: -INFINITY +pcmk__clone_allocate: dlm-clone allocation score on lxc-02_kiff-02: -INFINITY +pcmk__clone_allocate: dlm:0 allocation score on kiff-01: 1 +pcmk__clone_allocate: dlm:0 allocation score on kiff-02: 0 +pcmk__clone_allocate: dlm:0 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__clone_allocate: dlm:0 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__clone_allocate: dlm:0 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__clone_allocate: dlm:0 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__clone_allocate: dlm:1 allocation score on kiff-01: 0 +pcmk__clone_allocate: dlm:1 allocation score on kiff-02: 1 +pcmk__clone_allocate: dlm:1 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__clone_allocate: dlm:1 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__clone_allocate: dlm:1 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__clone_allocate: dlm:1 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__clone_allocate: dlm:2 allocation score on kiff-01: 0 +pcmk__clone_allocate: dlm:2 allocation score on kiff-02: 0 +pcmk__clone_allocate: dlm:2 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__clone_allocate: dlm:2 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__clone_allocate: dlm:2 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__clone_allocate: dlm:2 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__clone_allocate: dlm:3 allocation score on kiff-01: 0 +pcmk__clone_allocate: dlm:3 allocation score on kiff-02: 0 +pcmk__clone_allocate: dlm:3 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__clone_allocate: dlm:3 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__clone_allocate: dlm:3 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__clone_allocate: dlm:3 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__clone_allocate: dlm:4 allocation score on kiff-01: 0 +pcmk__clone_allocate: dlm:4 allocation score on kiff-02: 0 +pcmk__clone_allocate: dlm:4 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__clone_allocate: dlm:4 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__clone_allocate: dlm:4 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__clone_allocate: dlm:4 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__clone_allocate: dlm:5 allocation score on kiff-01: 0 +pcmk__clone_allocate: dlm:5 allocation score on kiff-02: 0 +pcmk__clone_allocate: dlm:5 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__clone_allocate: dlm:5 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__clone_allocate: dlm:5 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__clone_allocate: dlm:5 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__clone_allocate: shared0-clone allocation score on kiff-01: 0 +pcmk__clone_allocate: shared0-clone allocation score on kiff-02: 0 +pcmk__clone_allocate: shared0-clone allocation score on lxc-01_kiff-01: 0 +pcmk__clone_allocate: shared0-clone allocation score on lxc-01_kiff-02: 0 +pcmk__clone_allocate: shared0-clone allocation score on lxc-02_kiff-01: 0 +pcmk__clone_allocate: shared0-clone allocation score on lxc-02_kiff-02: 0 +pcmk__clone_allocate: shared0:0 allocation score on kiff-01: 1 +pcmk__clone_allocate: shared0:0 allocation score on kiff-02: 0 +pcmk__clone_allocate: shared0:0 allocation score on lxc-01_kiff-01: 0 +pcmk__clone_allocate: shared0:0 allocation score on lxc-01_kiff-02: 0 +pcmk__clone_allocate: shared0:0 allocation score on lxc-02_kiff-01: 0 +pcmk__clone_allocate: shared0:0 allocation score on lxc-02_kiff-02: 0 +pcmk__clone_allocate: shared0:1 allocation score on kiff-01: 0 +pcmk__clone_allocate: shared0:1 allocation score on kiff-02: 1 +pcmk__clone_allocate: shared0:1 allocation score on lxc-01_kiff-01: 0 +pcmk__clone_allocate: shared0:1 allocation score on lxc-01_kiff-02: 0 +pcmk__clone_allocate: shared0:1 allocation score on lxc-02_kiff-01: 0 +pcmk__clone_allocate: shared0:1 allocation score on lxc-02_kiff-02: 0 +pcmk__clone_allocate: shared0:2 allocation score on kiff-01: 0 +pcmk__clone_allocate: shared0:2 allocation score on kiff-02: 0 +pcmk__clone_allocate: shared0:2 allocation score on lxc-01_kiff-01: 0 +pcmk__clone_allocate: shared0:2 allocation score on lxc-01_kiff-02: 0 +pcmk__clone_allocate: shared0:2 allocation score on lxc-02_kiff-01: 0 +pcmk__clone_allocate: shared0:2 allocation score on lxc-02_kiff-02: 0 +pcmk__clone_allocate: shared0:3 allocation score on kiff-01: 0 +pcmk__clone_allocate: shared0:3 allocation score on kiff-02: 0 +pcmk__clone_allocate: shared0:3 allocation score on lxc-01_kiff-01: 0 +pcmk__clone_allocate: shared0:3 allocation score on lxc-01_kiff-02: 0 +pcmk__clone_allocate: shared0:3 allocation score on lxc-02_kiff-01: 0 +pcmk__clone_allocate: shared0:3 allocation score on lxc-02_kiff-02: 0 +pcmk__clone_allocate: shared0:4 allocation score on kiff-01: 0 +pcmk__clone_allocate: shared0:4 allocation score on kiff-02: 0 +pcmk__clone_allocate: shared0:4 allocation score on lxc-01_kiff-01: 0 +pcmk__clone_allocate: shared0:4 allocation score on lxc-01_kiff-02: 0 +pcmk__clone_allocate: shared0:4 allocation score on lxc-02_kiff-01: 0 +pcmk__clone_allocate: shared0:4 allocation score on lxc-02_kiff-02: 0 +pcmk__clone_allocate: shared0:5 allocation score on kiff-01: 0 +pcmk__clone_allocate: shared0:5 allocation score on kiff-02: 0 +pcmk__clone_allocate: shared0:5 allocation score on lxc-01_kiff-01: 0 +pcmk__clone_allocate: shared0:5 allocation score on lxc-01_kiff-02: 0 +pcmk__clone_allocate: shared0:5 allocation score on lxc-02_kiff-01: 0 +pcmk__clone_allocate: shared0:5 allocation score on lxc-02_kiff-02: 0 +pcmk__native_allocate: R-lxc-01_kiff-01 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: R-lxc-01_kiff-01 allocation score on kiff-02: 0 +pcmk__native_allocate: R-lxc-01_kiff-01 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: R-lxc-01_kiff-01 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: R-lxc-01_kiff-01 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: R-lxc-01_kiff-01 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: R-lxc-01_kiff-02 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: R-lxc-01_kiff-02 allocation score on kiff-02: 100 +pcmk__native_allocate: R-lxc-01_kiff-02 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: R-lxc-01_kiff-02 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: R-lxc-01_kiff-02 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: R-lxc-01_kiff-02 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: R-lxc-02_kiff-01 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: R-lxc-02_kiff-01 allocation score on kiff-02: 0 +pcmk__native_allocate: R-lxc-02_kiff-01 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: R-lxc-02_kiff-01 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: R-lxc-02_kiff-01 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: R-lxc-02_kiff-01 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: R-lxc-02_kiff-02 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: R-lxc-02_kiff-02 allocation score on kiff-02: 100 +pcmk__native_allocate: R-lxc-02_kiff-02 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: R-lxc-02_kiff-02 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: R-lxc-02_kiff-02 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: R-lxc-02_kiff-02 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: clvmd:0 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: clvmd:0 allocation score on kiff-02: -INFINITY +pcmk__native_allocate: clvmd:0 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: clvmd:0 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: clvmd:0 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: clvmd:0 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: clvmd:1 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: clvmd:1 allocation score on kiff-02: 1 +pcmk__native_allocate: clvmd:1 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: clvmd:1 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: clvmd:1 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: clvmd:1 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: clvmd:2 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: clvmd:2 allocation score on kiff-02: -INFINITY +pcmk__native_allocate: clvmd:2 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: clvmd:2 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: clvmd:2 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: clvmd:2 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: clvmd:3 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: clvmd:3 allocation score on kiff-02: -INFINITY +pcmk__native_allocate: clvmd:3 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: clvmd:3 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: clvmd:3 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: clvmd:3 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: clvmd:4 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: clvmd:4 allocation score on kiff-02: -INFINITY +pcmk__native_allocate: clvmd:4 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: clvmd:4 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: clvmd:4 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: clvmd:4 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: clvmd:5 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: clvmd:5 allocation score on kiff-02: -INFINITY +pcmk__native_allocate: clvmd:5 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: clvmd:5 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: clvmd:5 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: clvmd:5 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: dlm:0 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: dlm:0 allocation score on kiff-02: -INFINITY +pcmk__native_allocate: dlm:0 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: dlm:0 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: dlm:0 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: dlm:0 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: dlm:1 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: dlm:1 allocation score on kiff-02: 1 +pcmk__native_allocate: dlm:1 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: dlm:1 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: dlm:1 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: dlm:1 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: dlm:2 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: dlm:2 allocation score on kiff-02: -INFINITY +pcmk__native_allocate: dlm:2 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: dlm:2 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: dlm:2 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: dlm:2 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: dlm:3 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: dlm:3 allocation score on kiff-02: -INFINITY +pcmk__native_allocate: dlm:3 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: dlm:3 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: dlm:3 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: dlm:3 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: dlm:4 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: dlm:4 allocation score on kiff-02: -INFINITY +pcmk__native_allocate: dlm:4 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: dlm:4 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: dlm:4 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: dlm:4 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: dlm:5 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: dlm:5 allocation score on kiff-02: -INFINITY +pcmk__native_allocate: dlm:5 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: dlm:5 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: dlm:5 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: dlm:5 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: fence-kiff-01 allocation score on kiff-01: 0 +pcmk__native_allocate: fence-kiff-01 allocation score on kiff-02: 0 +pcmk__native_allocate: fence-kiff-01 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: fence-kiff-01 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: fence-kiff-01 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: fence-kiff-01 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: fence-kiff-02 allocation score on kiff-01: 0 +pcmk__native_allocate: fence-kiff-02 allocation score on kiff-02: 0 +pcmk__native_allocate: fence-kiff-02 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: fence-kiff-02 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: fence-kiff-02 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: fence-kiff-02 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: lxc-01_kiff-01 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: lxc-01_kiff-01 allocation score on kiff-02: 0 +pcmk__native_allocate: lxc-01_kiff-01 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: lxc-01_kiff-01 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: lxc-01_kiff-01 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: lxc-01_kiff-01 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: lxc-01_kiff-02 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: lxc-01_kiff-02 allocation score on kiff-02: 0 +pcmk__native_allocate: lxc-01_kiff-02 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: lxc-01_kiff-02 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: lxc-01_kiff-02 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: lxc-01_kiff-02 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: lxc-02_kiff-01 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: lxc-02_kiff-01 allocation score on kiff-02: 0 +pcmk__native_allocate: lxc-02_kiff-01 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: lxc-02_kiff-01 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: lxc-02_kiff-01 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: lxc-02_kiff-01 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: lxc-02_kiff-02 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: lxc-02_kiff-02 allocation score on kiff-02: 0 +pcmk__native_allocate: lxc-02_kiff-02 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: lxc-02_kiff-02 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: lxc-02_kiff-02 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: lxc-02_kiff-02 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: shared0:0 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: shared0:0 allocation score on kiff-02: -INFINITY +pcmk__native_allocate: shared0:0 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: shared0:0 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: shared0:0 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: shared0:0 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: shared0:1 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: shared0:1 allocation score on kiff-02: 1 +pcmk__native_allocate: shared0:1 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: shared0:1 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: shared0:1 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: shared0:1 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: shared0:2 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: shared0:2 allocation score on kiff-02: -INFINITY +pcmk__native_allocate: shared0:2 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: shared0:2 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: shared0:2 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: shared0:2 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: shared0:3 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: shared0:3 allocation score on kiff-02: -INFINITY +pcmk__native_allocate: shared0:3 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: shared0:3 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: shared0:3 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: shared0:3 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: shared0:4 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: shared0:4 allocation score on kiff-02: -INFINITY +pcmk__native_allocate: shared0:4 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: shared0:4 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: shared0:4 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: shared0:4 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: shared0:5 allocation score on kiff-01: -INFINITY +pcmk__native_allocate: shared0:5 allocation score on kiff-02: -INFINITY +pcmk__native_allocate: shared0:5 allocation score on lxc-01_kiff-01: -INFINITY +pcmk__native_allocate: shared0:5 allocation score on lxc-01_kiff-02: -INFINITY +pcmk__native_allocate: shared0:5 allocation score on lxc-02_kiff-01: -INFINITY +pcmk__native_allocate: shared0:5 allocation score on lxc-02_kiff-02: -INFINITY +pcmk__native_allocate: vm-fs allocation score on kiff-01: 0 +pcmk__native_allocate: vm-fs allocation score on kiff-02: 0 +pcmk__native_allocate: vm-fs allocation score on lxc-01_kiff-01: 0 +pcmk__native_allocate: vm-fs allocation score on lxc-01_kiff-02: 0 +pcmk__native_allocate: vm-fs allocation score on lxc-02_kiff-01: 0 +pcmk__native_allocate: vm-fs allocation score on lxc-02_kiff-02: 0 diff --git a/cts/scheduler/priority-fencing-delay.summary b/cts/scheduler/priority-fencing-delay.summary new file mode 100644 index 00000000000..20a9a4a91b2 --- /dev/null +++ b/cts/scheduler/priority-fencing-delay.summary @@ -0,0 +1,102 @@ + +Current cluster status: +Node kiff-01 (1): UNCLEAN (offline) +Online: [ kiff-02 ] +GuestOnline: [ lxc-01_kiff-02:R-lxc-01_kiff-02 lxc-02_kiff-02:R-lxc-02_kiff-02 ] + + vm-fs (ocf::heartbeat:Filesystem): FAILED lxc-01_kiff-01 + R-lxc-01_kiff-02 (ocf::heartbeat:VirtualDomain): Started kiff-02 + fence-kiff-01 (stonith:fence_ipmilan): Started kiff-02 + fence-kiff-02 (stonith:fence_ipmilan): Started kiff-01 (UNCLEAN) + Clone Set: dlm-clone [dlm] + dlm (ocf::pacemaker:controld): Started kiff-01 (UNCLEAN) + Started: [ kiff-02 ] + Stopped: [ lxc-01_kiff-01 lxc-01_kiff-02 lxc-02_kiff-01 lxc-02_kiff-02 ] + Clone Set: clvmd-clone [clvmd] + clvmd (ocf::heartbeat:clvm): Started kiff-01 (UNCLEAN) + Started: [ kiff-02 ] + Stopped: [ lxc-01_kiff-01 lxc-01_kiff-02 lxc-02_kiff-01 lxc-02_kiff-02 ] + Clone Set: shared0-clone [shared0] + shared0 (ocf::heartbeat:Filesystem): Started kiff-01 (UNCLEAN) + Started: [ kiff-02 ] + Stopped: [ lxc-01_kiff-01 lxc-01_kiff-02 lxc-02_kiff-01 lxc-02_kiff-02 ] + R-lxc-01_kiff-01 (ocf::heartbeat:VirtualDomain): FAILED kiff-01 (UNCLEAN) + R-lxc-02_kiff-01 (ocf::heartbeat:VirtualDomain): Started kiff-01 (UNCLEAN) + R-lxc-02_kiff-02 (ocf::heartbeat:VirtualDomain): Started kiff-02 + +Transition Summary: + * Fence (reboot) lxc-02_kiff-01 (resource: R-lxc-02_kiff-01) 'guest is unclean' + * Fence (reboot) lxc-01_kiff-01 (resource: R-lxc-01_kiff-01) 'guest is unclean' + * Fence (reboot) kiff-01 'peer is no longer part of the cluster' + * Recover vm-fs ( lxc-01_kiff-01 ) + * Move fence-kiff-02 ( kiff-01 -> kiff-02 ) + * Stop dlm:0 ( kiff-01 ) due to node availability + * Stop clvmd:0 ( kiff-01 ) due to node availability + * Stop shared0:0 ( kiff-01 ) due to node availability + * Recover R-lxc-01_kiff-01 ( kiff-01 -> kiff-02 ) + * Move R-lxc-02_kiff-01 ( kiff-01 -> kiff-02 ) + * Move lxc-01_kiff-01 ( kiff-01 -> kiff-02 ) + * Move lxc-02_kiff-01 ( kiff-01 -> kiff-02 ) + +Executing cluster transition: + * Resource action: vm-fs monitor on lxc-02_kiff-02 + * Resource action: vm-fs monitor on lxc-01_kiff-02 + * Pseudo action: fence-kiff-02_stop_0 + * Resource action: dlm monitor on lxc-02_kiff-02 + * Resource action: dlm monitor on lxc-01_kiff-02 + * Resource action: clvmd monitor on lxc-02_kiff-02 + * Resource action: clvmd monitor on lxc-01_kiff-02 + * Resource action: shared0 monitor on lxc-02_kiff-02 + * Resource action: shared0 monitor on lxc-01_kiff-02 + * Pseudo action: lxc-01_kiff-01_stop_0 + * Pseudo action: lxc-02_kiff-01_stop_0 + * Fencing kiff-01 (reboot) + * Pseudo action: R-lxc-01_kiff-01_stop_0 + * Pseudo action: R-lxc-02_kiff-01_stop_0 + * Pseudo action: stonith-lxc-02_kiff-01-reboot on lxc-02_kiff-01 + * Pseudo action: stonith-lxc-01_kiff-01-reboot on lxc-01_kiff-01 + * Pseudo action: vm-fs_stop_0 + * Resource action: fence-kiff-02 start on kiff-02 + * Pseudo action: shared0-clone_stop_0 + * Resource action: R-lxc-01_kiff-01 start on kiff-02 + * Resource action: R-lxc-02_kiff-01 start on kiff-02 + * Resource action: lxc-01_kiff-01 start on kiff-02 + * Resource action: lxc-02_kiff-01 start on kiff-02 + * Resource action: vm-fs start on lxc-01_kiff-01 + * Resource action: fence-kiff-02 monitor=60000 on kiff-02 + * Pseudo action: shared0_stop_0 + * Pseudo action: shared0-clone_stopped_0 + * Resource action: R-lxc-01_kiff-01 monitor=10000 on kiff-02 + * Resource action: R-lxc-02_kiff-01 monitor=10000 on kiff-02 + * Resource action: lxc-01_kiff-01 monitor=30000 on kiff-02 + * Resource action: lxc-02_kiff-01 monitor=30000 on kiff-02 + * Resource action: vm-fs monitor=20000 on lxc-01_kiff-01 + * Pseudo action: clvmd-clone_stop_0 + * Pseudo action: clvmd_stop_0 + * Pseudo action: clvmd-clone_stopped_0 + * Pseudo action: dlm-clone_stop_0 + * Pseudo action: dlm_stop_0 + * Pseudo action: dlm-clone_stopped_0 + +Revised cluster status: +Online: [ kiff-02 ] +OFFLINE: [ kiff-01 ] +GuestOnline: [ lxc-01_kiff-01:R-lxc-01_kiff-01 lxc-01_kiff-02:R-lxc-01_kiff-02 lxc-02_kiff-01:R-lxc-02_kiff-01 lxc-02_kiff-02:R-lxc-02_kiff-02 ] + + vm-fs (ocf::heartbeat:Filesystem): Started lxc-01_kiff-01 + R-lxc-01_kiff-02 (ocf::heartbeat:VirtualDomain): Started kiff-02 + fence-kiff-01 (stonith:fence_ipmilan): Started kiff-02 + fence-kiff-02 (stonith:fence_ipmilan): Started kiff-02 + Clone Set: dlm-clone [dlm] + Started: [ kiff-02 ] + Stopped: [ kiff-01 lxc-01_kiff-01 lxc-01_kiff-02 lxc-02_kiff-01 lxc-02_kiff-02 ] + Clone Set: clvmd-clone [clvmd] + Started: [ kiff-02 ] + Stopped: [ kiff-01 lxc-01_kiff-01 lxc-01_kiff-02 lxc-02_kiff-01 lxc-02_kiff-02 ] + Clone Set: shared0-clone [shared0] + Started: [ kiff-02 ] + Stopped: [ kiff-01 lxc-01_kiff-01 lxc-01_kiff-02 lxc-02_kiff-01 lxc-02_kiff-02 ] + R-lxc-01_kiff-01 (ocf::heartbeat:VirtualDomain): Started kiff-02 + R-lxc-02_kiff-01 (ocf::heartbeat:VirtualDomain): Started kiff-02 + R-lxc-02_kiff-02 (ocf::heartbeat:VirtualDomain): Started kiff-02 + diff --git a/cts/scheduler/priority-fencing-delay.xml b/cts/scheduler/priority-fencing-delay.xml new file mode 100644 index 00000000000..04d1d3e2904 --- /dev/null +++ b/cts/scheduler/priority-fencing-delay.xml @@ -0,0 +1,351 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index c93efb3dcdc..97fd4ad9287 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -815,6 +815,7 @@ te_fence_node(crm_graph_t *graph, crm_action_t *action) const char *target = NULL; const char *type = NULL; char *transition_key = NULL; + const char *priority_delay = NULL; gboolean invalid_action = FALSE; enum stonith_call_options options = st_opt_none; @@ -833,9 +834,11 @@ te_fence_node(crm_graph_t *graph, crm_action_t *action) return FALSE; } + priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY); + crm_notice("Requesting fencing (%s) of node %s " - CRM_XS " action=%s timeout=%u", - type, target, id, transition_graph->stonith_timeout); + CRM_XS " action=%s timeout=%u priority_delay=%s", + type, target, id, transition_graph->stonith_timeout, priority_delay); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); @@ -844,9 +847,9 @@ te_fence_node(crm_graph_t *graph, crm_action_t *action) options |= st_opt_allow_suicide; } - rc = stonith_api->cmds->fence(stonith_api, options, target, type, - (int) (transition_graph->stonith_timeout / 1000), - 0); + rc = stonith_api->cmds->fence_with_delay(stonith_api, options, target, type, + (int) (transition_graph->stonith_timeout / 1000), + 0, crm_atoi(priority_delay, "-1")); transition_key = pcmk__transition_key(transition_graph->id, action->id, 0, te_uuid), diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index d92dd9a1e0a..aa224c59671 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -78,7 +78,7 @@ typedef struct async_command_s { int default_timeout; /* seconds */ int timeout; /* seconds */ - int start_delay; /* milliseconds */ + int start_delay; /* seconds */ int delay_id; char *op; @@ -121,7 +121,7 @@ static int get_action_delay_max(stonith_device_t * device, const char * action) { const char *value = NULL; - int delay_max_ms = 0; + int delay_max = 0; if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) { return 0; @@ -129,17 +129,17 @@ get_action_delay_max(stonith_device_t * device, const char * action) value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_MAX); if (value) { - delay_max_ms = crm_get_msec(value); + delay_max = crm_parse_interval_spec(value) / 1000; } - return delay_max_ms; + return delay_max; } static int get_action_delay_base(stonith_device_t * device, const char * action) { const char *value = NULL; - int delay_base_ms = 0; + int delay_base = 0; if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) { return 0; @@ -147,10 +147,10 @@ get_action_delay_base(stonith_device_t * device, const char * action) value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_BASE); if (value) { - delay_base_ms = crm_get_msec(value); + delay_base = crm_parse_interval_spec(value) / 1000; } - return delay_base_ms; + return delay_base; } /*! @@ -241,6 +241,9 @@ create_async_command(xmlNode * msg) crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options)); crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; + // Default value -1 means no enforced fencing delay + cmd->start_delay = -1; + crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay)); cmd->origin = crm_element_value_copy(msg, F_ORIG); cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID); @@ -347,7 +350,7 @@ stonith_device_execute(stonith_device_t * device) if (pending_op && pending_op->delay_id) { crm_trace - ("Operation '%s'%s%s on %s was asked to run too early, waiting for start_delay timeout of %dms", + ("Operation '%s'%s%s on %s was asked to run too early, waiting for start_delay timeout of %ds", pending_op->action, pending_op->victim ? " targeting " : "", pending_op->victim ? pending_op->victim : "", device->id, pending_op->start_delay); @@ -462,6 +465,7 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; int delay_base = 0; + bool delay_enforced = (cmd->start_delay >= 0); CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); @@ -494,30 +498,37 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); - delay_max = get_action_delay_max(device, cmd->action); - delay_base = get_action_delay_base(device, cmd->action); - if (delay_max == 0) { - delay_max = delay_base; - } - if (delay_max < delay_base) { - crm_warn("Base-delay (%dms) is larger than max-delay (%dms) " - "for %s on %s - limiting to max-delay", - delay_base, delay_max, cmd->action, device->id); - delay_base = delay_max; + // No enforced fencing delay + if (delay_enforced == FALSE) { + delay_max = get_action_delay_max(device, cmd->action); + delay_base = get_action_delay_base(device, cmd->action); + if (delay_max == 0) { + delay_max = delay_base; + } + if (delay_max < delay_base) { + crm_warn("Base-delay (%ds) is larger than max-delay (%ds) " + "for %s on %s - limiting to max-delay", + delay_base, delay_max, cmd->action, device->id); + delay_base = delay_max; + } + if (delay_max > 0) { + // coverity[dont_call] We're not using rand() for security + cmd->start_delay = + ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + + delay_base; + } } - if (delay_max > 0) { - // coverity[dont_call] We're not using rand() for security - cmd->start_delay = - ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) - + delay_base; - crm_notice("Delaying '%s' action%s%s on %s for %dms (timeout=%ds, base=%dms, " - "max=%dms)", - cmd->action, - cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", - device->id, cmd->start_delay, cmd->timeout, - delay_base, delay_max); + + if (cmd->start_delay > 0) { + crm_notice("Delaying '%s' action%s%s on %s for %s%ds (timeout=%ds, base=%ds, " + "max=%ds)", + cmd->action, + cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", + device->id, delay_enforced ? "enforced " : "", + cmd->start_delay, cmd->timeout, + delay_base, delay_max); cmd->delay_id = - g_timeout_add(cmd->start_delay, start_delay_helper, cmd); + g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd); } } diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c index 11125d73602..ac9bcb06923 100644 --- a/daemons/fenced/fenced_remote.c +++ b/daemons/fenced/fenced_remote.c @@ -842,6 +842,11 @@ stonith_topology_next(remote_fencing_op_t * op) op->client_name, op->originator, op->id); set_op_device_list(op, tp->levels[op->level]); + // The enforced delay has been applied for the first fencing level + if (op->level > 1 && op->delay > 0) { + op->delay = 0; + } + if (g_list_next(op->devices_list) && safe_str_eq(op->action, "reboot")) { /* A reboot has been requested for a topology level with multiple * devices. Instead of rebooting the devices sequentially, we will @@ -1000,6 +1005,10 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout)); + // Default value -1 means no enforced fencing delay + op->delay = -1; + crm_element_value_int(request, F_STONITH_DELAY, &(op->delay)); + if (peer && dev) { op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID); } else { @@ -1448,6 +1457,12 @@ advance_op_topology(remote_fencing_op_t *op, const char *device, xmlNode *msg, /* Necessary devices remain, so execute the next one */ crm_trace("Next targeting %s on behalf of %s@%s (rc was %d)", op->target, op->originator, op->client_name, rc); + + // The enforced delay has been applied for the first device + if (op->delay > 0) { + op->delay = 0; + } + call_remote_stonith(op, NULL); } else { /* We're done with all devices and phases, so finalize operation */ @@ -1503,6 +1518,10 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options); + if (op->delay >= 0) { + crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay); + } + if (device) { timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(op, peer, device); diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h index cc7f9105c5d..73cd28e340f 100644 --- a/daemons/fenced/pacemaker-fenced.h +++ b/daemons/fenced/pacemaker-fenced.h @@ -113,6 +113,10 @@ typedef struct remote_fencing_op_s { * values associated with the devices this fencing operation may call */ gint total_timeout; + /*! Enforced fencing delay. + * Default value -1 means no enforced fencing delay. */ + int delay; + /*! Delegate is the node being asked to perform a fencing action * on behalf of the node that owns the remote operation. Some operations * will involve multiple delegates. This value represents the final delegate diff --git a/doc/Pacemaker_Explained/en-US/Ch-Options.txt b/doc/Pacemaker_Explained/en-US/Ch-Options.txt index 35856aaf649..a6b4fdd2eb3 100644 --- a/doc/Pacemaker_Explained/en-US/Ch-Options.txt +++ b/doc/Pacemaker_Explained/en-US/Ch-Options.txt @@ -292,6 +292,22 @@ are +stop+ to attempt to immediately stop pacemaker and stay stopped, or on failure. The default is likely to be changed to +panic+ in a future release. '(since 2.0.3)' +| priority-fencing-delay | | +indexterm:[priority-fencing-delay,Cluster Option] +indexterm:[Cluster,Option,priority-fencing-delay] +Enforce specified delay for the fencings that are targeting the lost +nodes with the highest total resource priority in case we don't +have the majority of the nodes in our cluster partition, so that +the more significant nodes potentially win any fencing match, +which is especially meaningful under split-brain of 2-node +cluster. A promoted resource instance takes the base priority + 1 +on calculation if the base priority is not 0. If all the nodes +have equal priority, then any pcmk_delay_base/max configured for +the corresponding fencing resources will be applied. Otherwise as +long as it's set, even if to 0, it takes precedence over any +configured pcmk_delay_base/max. By default, priority fencing +delay is disabled. + | cluster-delay | 60s | indexterm:[cluster-delay,Cluster Option] indexterm:[Cluster,Option,cluster-delay] diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h index 82389b4181d..3f2c0906025 100644 --- a/include/crm/fencing/internal.h +++ b/include/crm/fencing/internal.h @@ -68,6 +68,7 @@ stonith_history_t *stonith__sort_history(stonith_history_t *history); /*! Timeout period per a device execution */ # define F_STONITH_TIMEOUT "st_timeout" # define F_STONITH_TOLERANCE "st_tolerance" +# define F_STONITH_DELAY "st_delay" /*! Action specific timeout period returned in query of fencing devices. */ # define F_STONITH_ACTION_TIMEOUT "st_action_timeout" /*! Host in query result is not allowed to run this action */ diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h index d0cdf6cd3d0..af3f33e883e 100644 --- a/include/crm/msg_xml.h +++ b/include/crm/msg_xml.h @@ -348,6 +348,7 @@ extern "C" { # define XML_CONFIG_ATTR_FENCE_REACTION "fence-reaction" # define XML_CONFIG_ATTR_SHUTDOWN_LOCK "shutdown-lock" # define XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT "shutdown-lock-limit" +# define XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY "priority-fencing-delay" # define XML_ALERT_ATTR_PATH "path" # define XML_ALERT_ATTR_TIMEOUT "timeout" diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h index 6fcb63ef566..b4ce1574cd2 100644 --- a/include/crm/pengine/internal.h +++ b/include/crm/pengine/internal.h @@ -378,7 +378,7 @@ typedef struct op_digest_cache_s { op_digest_cache_t *rsc_action_digest_cmp(pe_resource_t * rsc, xmlNode * xml_op, pe_node_t * node, pe_working_set_t * data_set); -pe_action_t *pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason, pe_working_set_t * data_set); +pe_action_t *pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t * data_set); void trigger_unfencing( pe_resource_t * rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t * data_set); @@ -396,7 +396,7 @@ gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj void print_rscs_brief(GListPtr rsc_list, const char * pre_text, long options, void * print_data, gboolean print_all); int pe__rscs_brief_output(pcmk__output_t *out, GListPtr rsc_list, long options, gboolean print_all); -void pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason); +void pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason, bool priority_delay); pe_node_t *pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set); diff --git a/include/crm/pengine/pe_types.h b/include/crm/pengine/pe_types.h index a6060e59cf3..a8974f8fd0c 100644 --- a/include/crm/pengine/pe_types.h +++ b/include/crm/pengine/pe_types.h @@ -176,6 +176,7 @@ struct pe_working_set_s { time_t recheck_by; // Hint to controller to re-run scheduler by this time int ninstances; // Total number of resource instances guint shutdown_lock;// How long (seconds) to lock resources to shutdown node + int priority_fencing_delay; // Enforced priority fencing delay }; enum pe_check_parameters { @@ -220,6 +221,7 @@ struct pe_node_shared_s { GHashTable *attrs; /* char* => char* */ GHashTable *utilization; GHashTable *digest_cache; //!< cache of calculated resource digests + int priority; // calculated based on the priority of resources running on the node }; struct pe_node_s { diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h index 3637bc7aa0d..06812e7675f 100644 --- a/include/crm/stonith-ng.h +++ b/include/crm/stonith-ng.h @@ -400,6 +400,25 @@ typedef struct stonith_api_operations_s stonith_key_value_t *params, int timeout, char **output, char **error_output); + /*! + * \brief Issue a fencing action against a node with enforced fencing delay. + * + * \note Possible actions are, 'on', 'off', and 'reboot'. + * + * \param st, stonith connection + * \param options, call options + * \param node, The target node to fence + * \param action, The fencing action to take + * \param timeout, The default per device timeout to use with each device + * capable of fencing the target. + * \param delay, Any enforced fencing delay. -1 to disable + * + * \retval 0 success + * \retval negative error code on failure. + */ + int (*fence_with_delay)(stonith_t *st, int options, const char *node, const char *action, + int timeout, int tolerance, int delay); + } stonith_api_operations_t; struct stonith_s diff --git a/include/pcmki/pcmki_fence.h b/include/pcmki/pcmki_fence.h index 6f699e27eca..e091a171045 100644 --- a/include/pcmki/pcmki_fence.h +++ b/include/pcmki/pcmki_fence.h @@ -26,11 +26,13 @@ * \param[in] tolerance If a successful action for \p target happened within * this many ms, return 0 without performing the action * again. + * \param[in] delay Enforce a fencing delay. Value -1 means disabled. * * \return Standard Pacemaker return code */ int pcmk__fence_action(stonith_t *st, const char *target, const char *action, - const char *name, unsigned int timeout, unsigned int tolerance); + const char *name, unsigned int timeout, unsigned int tolerance, + int delay); /*! * \brief List the fencing operations that have occurred for a specific node. diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index 025944136c8..d76b956bf4f 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -1069,8 +1069,8 @@ stonith_api_status(stonith_t * stonith, int call_options, const char *id, const } static int -stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action, - int timeout, int tolerance) +stonith_api_fence_with_delay(stonith_t * stonith, int call_options, const char *node, + const char *action, int timeout, int tolerance, int delay) { int rc = 0; xmlNode *data = NULL; @@ -1081,12 +1081,24 @@ stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const crm_xml_add_int(data, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(data, F_STONITH_TOLERANCE, tolerance); + if (delay >= 0) { + crm_xml_add_int(data, F_STONITH_DELAY, delay); + } + rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout); free_xml(data); return rc; } +static int +stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action, + int timeout, int tolerance) +{ + return stonith_api_fence_with_delay(stonith, call_options, node, action, + timeout, tolerance, -1); +} + static int stonith_api_confirm(stonith_t * stonith, int call_options, const char *target) { @@ -1863,6 +1875,14 @@ stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNod crm_xml_add_int(op_msg, F_STONITH_TIMEOUT, timeout); crm_trace("Sending %s message to fencer with timeout %ds", op, timeout); + if (data) { + const char *delay_s = crm_element_value(data, F_STONITH_DELAY); + + if (delay_s) { + crm_xml_add(op_msg, F_STONITH_DELAY, delay_s); + } + } + { enum crm_ipc_flags ipc_flags = crm_ipc_flags_none; @@ -2117,6 +2137,7 @@ stonith_api_new(void) new_stonith->cmds->monitor = stonith_api_monitor; new_stonith->cmds->status = stonith_api_status; new_stonith->cmds->fence = stonith_api_fence; + new_stonith->cmds->fence_with_delay = stonith_api_fence_with_delay; new_stonith->cmds->confirm = stonith_api_confirm; new_stonith->cmds->history = stonith_api_history; diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c index 86e5895ee6e..5e122bc73ce 100644 --- a/lib/pacemaker/pcmk_fence.c +++ b/lib/pacemaker/pcmk_fence.c @@ -30,6 +30,7 @@ static struct { char *name; unsigned int timeout; unsigned int tolerance; + int delay; int rc; } async_fence_data; @@ -109,12 +110,13 @@ async_fence_helper(gpointer user_data) st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, notify_callback); - call_id = st->cmds->fence(st, - st_opt_allow_suicide, - async_fence_data.target, - async_fence_data.action, - async_fence_data.timeout/1000, - async_fence_data.tolerance/1000); + call_id = st->cmds->fence_with_delay(st, + st_opt_allow_suicide, + async_fence_data.target, + async_fence_data.action, + async_fence_data.timeout/1000, + async_fence_data.tolerance/1000, + async_fence_data.delay); if (call_id < 0) { g_main_loop_quit(mainloop); @@ -131,7 +133,8 @@ async_fence_helper(gpointer user_data) int pcmk__fence_action(stonith_t *st, const char *target, const char *action, - const char *name, unsigned int timeout, unsigned int tolerance) + const char *name, unsigned int timeout, unsigned int tolerance, + int delay) { crm_trigger_t *trig; @@ -141,6 +144,7 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action, async_fence_data.action = action; async_fence_data.timeout = timeout; async_fence_data.tolerance = tolerance; + async_fence_data.delay = delay; async_fence_data.rc = pcmk_err_generic; trig = mainloop_add_trigger(G_PRIORITY_HIGH, async_fence_helper, NULL); @@ -157,8 +161,10 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action, #ifdef BUILD_PUBLIC_LIBPACEMAKER int pcmk_fence_action(stonith_t *st, const char *target, const char *action, - const char *name, unsigned int timeout, unsigned int tolerance) { - return pcmk__fence_action(st, target, action, name, timeout, tolerance); + const char *name, unsigned int timeout, unsigned int tolerance, + int delay) +{ + return pcmk__fence_action(st, target, action, name, timeout, tolerance, delay); } #endif diff --git a/lib/pacemaker/pcmk_sched_allocate.c b/lib/pacemaker/pcmk_sched_allocate.c index 5a39f79f8eb..156f9668a54 100644 --- a/lib/pacemaker/pcmk_sched_allocate.c +++ b/lib/pacemaker/pcmk_sched_allocate.c @@ -918,7 +918,7 @@ probe_resources(pe_working_set_t * data_set) if (pe__is_remote_node(node) && node->details->remote_rsc && (get_remote_node_state(node) == remote_state_failed)) { - pe_fence_node(data_set, node, "the connection is unrecoverable"); + pe_fence_node(data_set, node, "the connection is unrecoverable", FALSE); } continue; @@ -1556,7 +1556,7 @@ fence_guest(pe_node_t *node, pe_working_set_t *data_set) /* Create a fence pseudo-event, so we have an event to order actions * against, and the controller can always detect it. */ - stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", data_set); + stonith_op = pe_fence_op(node, fence_action, FALSE, "guest is unclean", FALSE, data_set); update_action_flags(stonith_op, pe_action_pseudo | pe_action_runnable, __FUNCTION__, __LINE__); @@ -1565,7 +1565,7 @@ fence_guest(pe_node_t *node, pe_working_set_t *data_set) * (even though start might be closer to what is done for a real reboot). */ if(stop && is_set(stop->flags, pe_action_pseudo)) { - pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, data_set); + pe_action_t *parent_stonith_op = pe_fence_op(stop->node, NULL, FALSE, NULL, FALSE, data_set); crm_info("Implying guest node %s is down (action %d) after %s fencing", node->details->uname, stonith_op->id, stop->node->details->uname); order_actions(parent_stonith_op, stonith_op, @@ -1656,7 +1656,7 @@ stage6(pe_working_set_t * data_set) if (node->details->unclean && need_stonith && pe_can_fence(data_set, node)) { - stonith_op = pe_fence_op(node, NULL, FALSE, "node is unclean", data_set); + stonith_op = pe_fence_op(node, NULL, FALSE, "node is unclean", FALSE, data_set); pe_warn("Scheduling Node %s for STONITH", node->details->uname); pcmk__order_vs_fence(stonith_op, data_set); @@ -1957,7 +1957,7 @@ apply_container_ordering(pe_action_t *action, pe_working_set_t *data_set) CRM_ASSERT(container); if(is_set(container->flags, pe_rsc_failed)) { - pe_fence_node(data_set, action->node, "container failed"); + pe_fence_node(data_set, action->node, "container failed", FALSE); } crm_trace("Order %s action %s relative to %s%s for %s%s", @@ -2164,7 +2164,7 @@ apply_remote_ordering(pe_action_t *action, pe_working_set_t *data_set) * to the remote connection, since the stop will become implied * by the fencing. */ - pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable"); + pe_fence_node(data_set, action->node, "resources are active and the connection is unrecoverable", FALSE); } else if(remote_rsc->next_role == RSC_ROLE_STOPPED) { /* State must be remote_state_unknown or remote_state_stopped. @@ -2212,7 +2212,7 @@ apply_remote_ordering(pe_action_t *action, pe_working_set_t *data_set) * Since we have no way to find out, it is * necessary to fence the node. */ - pe_fence_node(data_set, action->node, "resources are in an unknown state and the connection is unrecoverable"); + pe_fence_node(data_set, action->node, "resources are in an unknown state and the connection is unrecoverable", FALSE); } if(cluster_node && state == remote_state_stopped) { diff --git a/lib/pacemaker/pcmk_sched_native.c b/lib/pacemaker/pcmk_sched_native.c index e63736af358..9ff30287653 100644 --- a/lib/pacemaker/pcmk_sched_native.c +++ b/lib/pacemaker/pcmk_sched_native.c @@ -1476,7 +1476,7 @@ native_internal_constraints(pe_resource_t * rsc, pe_working_set_t * data_set) for (GList *item = allowed_nodes; item; item = item->next) { pe_node_t *node = item->data; - pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, data_set); + pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE, data_set); crm_debug("Ordering any stops of %s before %s, and any starts after", rsc->id, unfence->uuid); @@ -1962,7 +1962,7 @@ rsc_ticket_constraint(pe_resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_work for (gIter = rsc_lh->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; - pe_fence_node(data_set, node, "deadman ticket was lost"); + pe_fence_node(data_set, node, "deadman ticket was lost", FALSE); } break; @@ -2665,7 +2665,7 @@ StopRsc(pe_resource_t * rsc, pe_node_t * next, gboolean optional, pe_working_set } if(is_set(rsc->flags, pe_rsc_needs_unfencing)) { - pe_action_t *unfence = pe_fence_op(current, "on", TRUE, NULL, data_set); + pe_action_t *unfence = pe_fence_op(current, "on", TRUE, NULL, FALSE, data_set); order_actions(stop, unfence, pe_order_implies_first); if (!node_has_been_unfenced(current)) { @@ -2695,7 +2695,7 @@ order_after_unfencing(pe_resource_t *rsc, pe_node_t *node, pe_action_t *action, * the node being unfenced, and all its resources being stopped, * whenever a new resource is added -- which would be highly suboptimal. */ - pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, data_set); + pe_action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, FALSE, data_set); order_actions(unfence, action, order); diff --git a/lib/pengine/common.c b/lib/pengine/common.c index b642d40074f..b9a622d42c0 100644 --- a/lib/pengine/common.c +++ b/lib/pengine/common.c @@ -158,6 +158,23 @@ static pcmk__cluster_option_t pe_opts[] = { "Setting this to false may lead to a \"split-brain\" situation," "potentially leading to data loss and/or service unavailability." }, + { + XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY, NULL, "time", NULL, + NULL, pcmk__valid_interval_spec, + "Enforced fencing delay targeting the lost nodes with the highest total resource priority", + "Enforce specified delay for the fencings that are targeting the lost " + "nodes with the highest total resource priority in case we don't " + "have the majority of the nodes in our cluster partition, so that " + "the more significant nodes potentially win any fencing match, " + "which is especially meaningful under split-brain of 2-node " + "cluster. A promoted resource instance takes the base priority + 1 " + "on calculation if the base priority is not 0. If all the nodes " + "have equal priority, then any pcmk_delay_base/max configured for " + "the corresponding fencing resources will be applied. Otherwise as " + "long as it's set, even if to 0, it takes precedence over any " + "configured pcmk_delay_base/max. By default, priority fencing " + "delay is disabled." + }, { "cluster-delay", NULL, "time", NULL, diff --git a/lib/pengine/native.c b/lib/pengine/native.c index 8308d88667f..3a6dc24da77 100644 --- a/lib/pengine/native.c +++ b/lib/pengine/native.c @@ -34,6 +34,51 @@ is_multiply_active(pe_resource_t *rsc) return count > 1; } +static void +native_priority_to_node(pe_resource_t * rsc, pe_node_t * node) +{ + int priority = 0; + + if (rsc->priority == 0) { + return; + } + + if (rsc->role == RSC_ROLE_MASTER) { + // Promoted instance takes base priority + 1 + priority = rsc->priority + 1; + + } else { + priority = rsc->priority; + } + + node->details->priority += priority; + pe_rsc_trace(rsc, "Node '%s' now has priority %d with %s'%s' (priority: %d%s)", + node->details->uname, node->details->priority, + rsc->role == RSC_ROLE_MASTER ? "promoted " : "", + rsc->id, rsc->priority, + rsc->role == RSC_ROLE_MASTER ? " + 1" : ""); + + /* Priority of a resource running on a guest node is added to the cluster + * node as well. */ + if (node->details->remote_rsc + && node->details->remote_rsc->container) { + GListPtr gIter = node->details->remote_rsc->container->running_on; + + for (; gIter != NULL; gIter = gIter->next) { + pe_node_t *a_node = gIter->data; + + a_node->details->priority += priority; + pe_rsc_trace(rsc, "Node '%s' now has priority %d with %s'%s' (priority: %d%s) " + "from guest node '%s'", + a_node->details->uname, a_node->details->priority, + rsc->role == RSC_ROLE_MASTER ? "promoted " : "", + rsc->id, rsc->priority, + rsc->role == RSC_ROLE_MASTER ? " + 1" : "", + node->details->uname); + } + } +} + void native_add_running(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set) { @@ -55,6 +100,8 @@ native_add_running(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * dat rsc->running_on = g_list_append(rsc->running_on, node); if (rsc->variant == pe_native) { node->details->running_rsc = g_list_append(node->details->running_rsc, rsc); + + native_priority_to_node(rsc, node); } if (rsc->variant == pe_native && node->details->maintenance) { diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 11bbd08a008..fee6b9938cd 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -74,9 +74,11 @@ is_dangling_guest_node(pe_node_t *node) * \param[in,out] data_set Current working set of cluster * \param[in,out] node Node to fence * \param[in] reason Text description of why fencing is needed + * \param[in] priority_delay Whether to consider `priority-fencing-delay` */ void -pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason) +pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, + const char *reason, bool priority_delay) { CRM_CHECK(node, return); @@ -126,7 +128,8 @@ pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason) reason); } node->details->unclean = TRUE; - pe_fence_op(node, NULL, TRUE, reason, data_set); + // No need to apply `priority-fencing-delay` for remote nodes + pe_fence_op(node, NULL, TRUE, reason, FALSE, data_set); } else if (node->details->unclean) { crm_trace("Cluster node %s %s because %s", @@ -140,7 +143,7 @@ pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason) pe_can_fence(data_set, node)? "will be fenced" : "is unclean", reason); node->details->unclean = TRUE; - pe_fence_op(node, NULL, TRUE, reason, data_set); + pe_fence_op(node, NULL, TRUE, reason, priority_delay, data_set); } } @@ -225,6 +228,15 @@ unpack_config(xmlNode * config, pe_working_set_t * data_set) crm_debug("Concurrent fencing is %s", is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled"); + // Default value -1 means `priority-fencing-delay` is disabled + data_set->priority_fencing_delay = -1; + value = pe_pref(data_set->config_hash, + XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY); + if (value) { + data_set->priority_fencing_delay = crm_parse_interval_spec(value) / 1000; + crm_trace("Priority fencing delay is %ds", data_set->priority_fencing_delay); + } + set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false"); @@ -1157,7 +1169,7 @@ unpack_status(xmlNode * status, pe_working_set_t * data_set) * (at least until the scheduler becomes able to migrate off * healthy resources) */ - pe_fence_node(data_set, this_node, "cluster does not have quorum"); + pe_fence_node(data_set, this_node, "cluster does not have quorum", FALSE); } } } @@ -1229,7 +1241,7 @@ determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_s } else { /* mark it unclean */ - pe_fence_node(data_set, this_node, "peer is unexpectedly down"); + pe_fence_node(data_set, this_node, "peer is unexpectedly down", FALSE); crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s", crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state)); } @@ -1285,10 +1297,10 @@ determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_stat online = crmd_online; } else if (in_cluster == NULL) { - pe_fence_node(data_set, this_node, "peer has not been seen by the cluster"); + pe_fence_node(data_set, this_node, "peer has not been seen by the cluster", FALSE); } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) { - pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria"); + pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria", FALSE); } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) { @@ -1307,14 +1319,15 @@ determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_stat online = FALSE; } else if (crm_is_true(in_cluster) == FALSE) { - pe_fence_node(data_set, this_node, "peer is no longer part of the cluster"); + // Consider `priority-fencing-delay` for lost nodes + pe_fence_node(data_set, this_node, "peer is no longer part of the cluster", TRUE); } else if (!crmd_online) { - pe_fence_node(data_set, this_node, "peer process is no longer available"); + pe_fence_node(data_set, this_node, "peer process is no longer available", FALSE); /* Everything is running at this point, now check join state */ } else if (do_terminate) { - pe_fence_node(data_set, this_node, "termination was requested"); + pe_fence_node(data_set, this_node, "termination was requested", FALSE); } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) { crm_info("Node %s is active", this_node->details->uname); @@ -1326,7 +1339,7 @@ determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_stat this_node->details->pending = TRUE; } else { - pe_fence_node(data_set, this_node, "peer was in an unknown state"); + pe_fence_node(data_set, this_node, "peer was in an unknown state", FALSE); crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown); @@ -1908,7 +1921,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, if (reason == NULL) { reason = crm_strdup_printf("%s is thought to be active there", rsc->id); } - pe_fence_node(data_set, node, reason); + pe_fence_node(data_set, node, reason, FALSE); } free(reason); } @@ -1930,7 +1943,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, * but also mark the node as unclean */ reason = crm_strdup_printf("%s failed there", rsc->id); - pe_fence_node(data_set, node, reason); + pe_fence_node(data_set, node, reason, FALSE); free(reason); break; @@ -1998,7 +2011,7 @@ process_rsc_state(pe_resource_t * rsc, pe_node_t * node, * should result in fencing the remote node. */ pe_fence_node(data_set, tmpnode, - "remote connection is unrecoverable"); + "remote connection is unrecoverable", FALSE); } } @@ -3079,7 +3092,7 @@ order_after_remote_fencing(pe_action_t *action, pe_resource_t *remote_conn, if (remote_node) { pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, - data_set); + FALSE, data_set); order_actions(fence, action, pe_order_implies_then); } diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 2e72dcddb87..9a8c2f0ac79 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -644,7 +644,7 @@ custom_action(pe_resource_t * rsc, char *key, const char *task, if (is_set(action->rsc->flags, pe_rsc_managed) && save_action && a_task == stop_rsc && action->node->details->unclean == FALSE) { - pe_fence_node(data_set, action->node, "resource actions are unrunnable"); + pe_fence_node(data_set, action->node, "resource actions are unrunnable", FALSE); } } else if (is_not_set(action->flags, pe_action_dc) @@ -2343,9 +2343,76 @@ find_unfencing_devices(GListPtr candidates, GListPtr matches) return matches; } +static int +node_priority_fencing_delay(pe_node_t * node, pe_working_set_t * data_set) +{ + int member_count = 0; + int online_count = 0; + int top_priority = 0; + int lowest_priority = 0; + GListPtr gIter = NULL; + + // `priority-fencing-delay` is disabled + if (data_set->priority_fencing_delay < 0) { + return -1; + } + + /* No need to delay fencing if the fencing target is not a normal cluster + * member, for example if it's a remote node or a guest node. */ + if (node->details->type != node_member) { + return 0; + } + + // No need to delay fencing if the fencing target is in our partition + if (node->details->online) { + return 0; + } + + for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { + pe_node_t *n = gIter->data; + + if (n->details->type != node_member) { + continue; + } + + member_count ++; + + if (n->details->online) { + online_count++; + } + + if (member_count == 1 + || n->details->priority > top_priority) { + top_priority = n->details->priority; + } + + if (member_count == 1 + || n->details->priority < lowest_priority) { + lowest_priority = n->details->priority; + } + } + + // No need to delay if we have more than half of the cluster members + if (online_count > member_count / 2) { + return 0; + } + + /* All the nodes have equal priority. + * Any configured corresponding `pcmk_delay_base/max` will be applied. */ + if (lowest_priority == top_priority) { + return -1; + } + + if (node->details->priority < top_priority) { + return 0; + } + + return data_set->priority_fencing_delay; +} pe_action_t * -pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason, pe_working_set_t * data_set) +pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason, + bool priority_delay, pe_working_set_t * data_set) { char *op_key = NULL; pe_action_t *stonith_op = NULL; @@ -2417,6 +2484,29 @@ pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason, free(op_key); } + if (data_set->priority_fencing_delay >= 0 + + /* It's a suitable case where `priority-fencing-delay` applies. + * At least add `priority-fencing-delay` field as an indicator. */ + && (priority_delay + + /* Re-calculate priority delay for the suitable case when + * pe_fence_op() is called again by stage6() after node priority has + * been actually calculated with native_add_running() */ + || g_hash_table_lookup(stonith_op->meta, + XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY) != NULL)) { + + /* Add `priority-fencing-delay` to the fencing op even if it's 0 for + * the targeting node. So that it takes precedence over any possible + * `pcmk_delay_base/max`. + */ + char *delay_s = crm_itoa(node_priority_fencing_delay(node, data_set)); + + g_hash_table_insert(stonith_op->meta, + strdup(XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY), + delay_s); + } + if(optional == FALSE && pe_can_fence(data_set, node)) { pe_action_required(stonith_op, NULL, reason); } else if(reason && stonith_op->reason == NULL) { @@ -2442,7 +2532,7 @@ trigger_unfencing( && node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { - pe_action_t *unfence = pe_fence_op(node, "on", FALSE, reason, data_set); + pe_action_t *unfence = pe_fence_op(node, "on", FALSE, reason, FALSE, data_set); if(dependency) { order_actions(unfence, dependency, pe_order_optional); diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c index 87b538ac94c..f98e335a177 100644 --- a/tools/stonith_admin.c +++ b/tools/stonith_admin.c @@ -54,6 +54,7 @@ struct { int fence_level; int timeout ; int tolerance; + int delay; char *agent; char *confirm_host; char *fence_host; @@ -69,7 +70,8 @@ struct { char *unregister_dev; char *unregister_level; } options = { - .timeout = 120 + .timeout = 120, + .delay = -1 }; gboolean add_env_params(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); @@ -205,6 +207,10 @@ static GOptionEntry addl_entries[] = { "Operation timeout in seconds (default 120;\n" INDENT "used with most commands).", "SECONDS" }, + { "delay", 'y', 0, G_OPTION_ARG_INT, &options.delay, + "Enforced fencing delay in seconds (default -1 (disabled);\n" + INDENT "with --fence, --reboot, --unfence).", + "SECONDS" }, { "as-node-id", 'n', 0, G_OPTION_ARG_NONE, &options.as_nodeid, "(Advanced) The supplied node is the corosync node ID\n" INDENT "(with --last).", @@ -566,17 +572,17 @@ main(int argc, char **argv) case 'B': rc = pcmk__fence_action(st, target, "reboot", name, options.timeout*1000, - options.tolerance*1000); + options.tolerance*1000, options.delay); break; case 'F': rc = pcmk__fence_action(st, target, "off", name, options.timeout*1000, - options.tolerance*1000); + options.tolerance*1000, options.delay); break; case 'U': rc = pcmk__fence_action(st, target, "on", name, options.timeout*1000, - options.tolerance*1000); + options.tolerance*1000, options.delay); break; case 'h':