|
| 1 | +--- |
| 2 | +- name: Check if host can enter maintenance mode |
| 3 | + ansible.builtin.include_role: |
| 4 | + name: stackhpc.cephadm.commands |
| 5 | + vars: |
| 6 | + cephadm_commands: |
| 7 | + - "orch host ok-to-stop {{ cephadm_hostname }}" |
| 8 | + |
| 9 | +# Annoyingly, 'ceph orch host ok-to-stop' does not exit non-zero when |
| 10 | +# it is not OK to stop, so we need to check for specific messages. |
| 11 | +- name: Assert that it is safe to stop host |
| 12 | + ansible.builtin.assert: |
| 13 | + that: |
| 14 | + # This one is seen for monitors |
| 15 | + - "'It is NOT safe' not in cephadm_commands_result.results[0].stderr" |
| 16 | + # This one is seen for OSDs |
| 17 | + - "'unsafe to stop' not in cephadm_commands_result.results[0].stderr" |
| 18 | + fail_msg: "{{ cephadm_commands_result.results[0].stderr }}" |
| 19 | + |
| 20 | +- name: Fail over Ceph manager |
| 21 | + ansible.builtin.include_role: |
| 22 | + name: stackhpc.cephadm.commands |
| 23 | + vars: |
| 24 | + cephadm_commands: |
| 25 | + - "mgr fail" |
| 26 | + when: '"Cannot stop active Mgr daemon" in cephadm_commands_result.results[0].stderr' |
| 27 | + |
| 28 | +# RADOS Gateway services prevent a host from entering maintenance. |
| 29 | +# Remove the rgw label from the host and wait for Ceph orchestrator to remove |
| 30 | +# the service from the host. |
| 31 | +- name: Stop RADOS Gateway service |
| 32 | + when: "'rgws' in group_names" |
| 33 | + block: |
| 34 | + - name: Ensure rgw label has been removed from node |
| 35 | + ansible.builtin.include_role: |
| 36 | + name: stackhpc.cephadm.commands |
| 37 | + vars: |
| 38 | + cephadm_commands: |
| 39 | + - "orch host label rm {{ cephadm_hostname }} rgw" |
| 40 | + |
| 41 | + - name: Wait for RADOS Gateway service to stop |
| 42 | + ansible.builtin.include_role: |
| 43 | + name: stackhpc.cephadm.commands |
| 44 | + vars: |
| 45 | + cephadm_commands: |
| 46 | + - "orch ls rgw --format json-pretty" |
| 47 | + cephadm_commands_until: >- |
| 48 | + {{ (cephadm_commands_result.stdout | from_json)[0].status.running == |
| 49 | + (cephadm_commands_result.stdout | from_json)[0].status.size }} |
| 50 | + cephadm_commands_retries: 30 |
| 51 | + cephadm_commands_delay: 10 |
| 52 | + |
| 53 | +- name: Ensure host is in maintenance mode |
| 54 | + block: |
| 55 | + - name: Ensure host is in maintenance mode |
| 56 | + ansible.builtin.include_role: |
| 57 | + name: stackhpc.cephadm.commands |
| 58 | + vars: |
| 59 | + cephadm_commands: |
| 60 | + - "orch host maintenance enter {{ cephadm_hostname }}" |
| 61 | + always: |
| 62 | + - name: Ensure rgw label has been added to node |
| 63 | + ansible.builtin.include_role: |
| 64 | + name: stackhpc.cephadm.commands |
| 65 | + vars: |
| 66 | + cephadm_commands: |
| 67 | + - "orch host label add {{ cephadm_hostname }} rgw" |
| 68 | + when: "'rgws' in group_names" |
0 commit comments