Skip to content

Commit d091404

Browse files
authored
Fixes to onboard x86_64 servers in the baremetal qe infra (#36487)
* Support Dell IPMI power commands On Dell servers, `ipmi power (off|on|reset)` returns errors when the host is in a state that doesn't allow the requested transition. Enforcing two commands (on + off) instead of reset, and ignoring any power off errors to ignore those validation errors. * Set the efi boot order after installing RHCOS in UPI/UEFI/PXE scenarios Some servers' firmware push any new detected boot options to the tail of the boot order. When other boot options are present and bootable, such a server will boot from them instead of the new one. As a (temporary?) workaround, we manually add the boot option. NOTE: it's assumed that old OSes boot options are removed from the boot options list during the wipe operations. xrefs: https://bugzilla.redhat.com/show_bug.cgi?id=1997805 coreos/fedora-coreos-tracker#946 coreos/fedora-coreos-tracker#947
1 parent 22eeef2 commit d091404

File tree

4 files changed

+41
-3
lines changed

4 files changed

+41
-3
lines changed

ci-operator/step-registry/baremetal/lab/ipi/install/baremetal-lab-ipi-install-commands.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ function prepare_bmc() {
3131
chassis bootparam set bootflag force_pxe options=PEF,watchdog,reset,power
3232
ipmitool -I lanplus -H "$bmc_address" \
3333
-U "$bmc_user" -P "$bmc_pass" \
34-
power off
34+
power off || echo "Already off"
3535
}
3636

3737
function update_image_registry() {

ci-operator/step-registry/baremetal/lab/post/wipe/baremetal-lab-post-wipe-commands.sh

+15-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,21 @@ function reset_host() {
6161
chassis bootparam set bootflag force_pxe options=PEF,watchdog,reset,power
6262
ipmitool -I lanplus -H "$bmc_address" \
6363
-U "$bmc_user" -P "$bmc_pass" \
64-
power reset
64+
power off || echo "Already off"
65+
# If the host is not already powered off, the power on command can fail while the host is still powering off.
66+
# Let's retry the power on command multiple times to make sure the command is received in the correct state.
67+
for i in {1..10} max; do
68+
if [ "$i" == "max" ]; then
69+
echo "Failed to reset $bmc_address"
70+
return 1
71+
fi
72+
ipmitool -I lanplus -H "$bmc_address" \
73+
-U "$bmc_user" -P "$bmc_pass" \
74+
power on && break
75+
echo "Failed to power on $bmc_address, retrying..."
76+
sleep 5
77+
done
78+
6579
if ! wait_for_power_down "$bmc_address" "$bmc_user" "$bmc_pass" "${name}"; then
6680
echo "$bmc_address" >> /tmp/failed
6781
fi

ci-operator/step-registry/baremetal/lab/pre/console-kargs/baremetal-lab-pre-console-kargs-commands.sh

+11
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,17 @@ systemd:
6060
--delete-karg console=ttyS0,115200n8 $(join_by_semicolon "${console_kargs}" "--append-karg console=" "") \
6161
--ignition-url ${base_url%%*(/)}/${role}.ign \
6262
--insecure-ignition --copy-network
63+
# Some servers' firmware push any new detected boot options to the tail of the boot order.
64+
# When other boot options are present and bootable, such a server will boot from them instead of the new one.
65+
# As a (temporary?) workaround, we manually add the boot option.
66+
# NOTE: it's assumed that old OSes boot options are removed from the boot options list during the wipe operations.
67+
# xrefs: https://bugzilla.redhat.com/show_bug.cgi?id=1997805
68+
# https://github.com/coreos/fedora-coreos-tracker/issues/946
69+
# https://github.com/coreos/fedora-coreos-tracker/issues/947
70+
ExecStart=/usr/bin/bash -c ' \
71+
ARCH=\$(uname -m | sed "s/x86_64/x64/;s/aarch64/aa64/"); \
72+
/usr/sbin/efibootmgr -c -d "$root_device" -p 2 -c -L "Red Hat CoreOS" -l "\\\\EFI\\\\redhat\\\\shim\$ARCH.efi" \
73+
'
6374
ExecStart=/usr/bin/systemctl --no-block reboot
6475
StandardOutput=kmsg+console
6576
StandardError=kmsg+console

ci-operator/step-registry/baremetal/lab/upi/install/baremetal-lab-upi-install-commands.sh

+14-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,20 @@ function reset_host() {
129129
chassis bootparam set bootflag force_pxe options=PEF,watchdog,reset,power
130130
ipmitool -I lanplus -H "$bmc_address" \
131131
-U "$bmc_user" -P "$bmc_pass" \
132-
power reset
132+
power off || echo "Already off"
133+
# If the host is not already powered off, the power on command can fail while the host is still powering off.
134+
# Let's retry the power on command multiple times to make sure the command is received in the correct state.
135+
for i in {1..10} max; do
136+
if [ "$i" == "max" ]; then
137+
echo "Failed to reset $bmc_address"
138+
return 1
139+
fi
140+
ipmitool -I lanplus -H "$bmc_address" \
141+
-U "$bmc_user" -P "$bmc_pass" \
142+
power on && break
143+
echo "Failed to power on $bmc_address, retrying..."
144+
sleep 5
145+
done
133146
}
134147

135148
function approve_csrs() {

0 commit comments

Comments
 (0)