From 21f91d0ed98a3f9725e3d4ad5cc387f463c0b7bd Mon Sep 17 00:00:00 2001 From: Urvashi Mohnani Date: Thu, 27 Jun 2024 09:32:05 -0400 Subject: [PATCH] OCB should fail if node is not coreos based If a non-coreos based node, such as RHEL8, is added to a cluster and the user tries to enable on cluster builds, throw an error as that is not supported on non-coreos based nodes. Signed-off-by: Urvashi Mohnani --- pkg/daemon/osrelease/osrelease.go | 7 +++++++ pkg/helpers/helpers.go | 28 +++++++++++++++++++++++++ pkg/operator/sync.go | 34 +++++++++++++++++++++++++++++++ test/e2e/node_os_test.go | 21 ++++++++----------- 4 files changed, 77 insertions(+), 13 deletions(-) diff --git a/pkg/daemon/osrelease/osrelease.go b/pkg/daemon/osrelease/osrelease.go index b87a6df75b..6a9e1aa28c 100644 --- a/pkg/daemon/osrelease/osrelease.go +++ b/pkg/daemon/osrelease/osrelease.go @@ -23,6 +23,13 @@ const ( scos string = "scos" ) +// OS Pretty Names +const ( + RHCOS string = "Red Hat Enterprise Linux CoreOS" + FCOS string = "Fedora Linux" + SCOS string = "CentOS Stream CoreOS" +) + // OperatingSystem is a wrapper around a subset of the os-release fields // and also tracks whether ostree is in use. type OperatingSystem struct { diff --git a/pkg/helpers/helpers.go b/pkg/helpers/helpers.go index 6e26550370..f8c4b71a59 100644 --- a/pkg/helpers/helpers.go +++ b/pkg/helpers/helpers.go @@ -2,10 +2,12 @@ package helpers import ( "fmt" + "strings" mcfgv1 "github.com/openshift/api/machineconfiguration/v1" v1 "github.com/openshift/client-go/machineconfiguration/listers/machineconfiguration/v1" ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" + "github.com/openshift/machine-config-operator/pkg/daemon/osrelease" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" corev1listers "k8s.io/client-go/listers/core/v1" @@ -103,6 +105,19 @@ func GetPoolsForNode(mcpLister v1.MachineConfigPoolLister, node *corev1.Node) ([ } } +func GetPoolNamesForNode(mcpLister v1.MachineConfigPoolLister, node *corev1.Node) ([]string, error) { + pools, _, err := GetPoolsForNode(mcpLister, node) + if err != nil { + return nil, err + } + + var names []string + for _, pool := range pools { + names = append(names, pool.Name) + } + return names, nil +} + // isWindows checks if given node is a Windows node or a Linux node func IsWindows(node *corev1.Node) bool { windowsOsValue := "windows" @@ -164,3 +179,16 @@ func ListPools(node *corev1.Node, mcpLister v1.MachineConfigPoolLister) (*mcfgv1 return master, worker, custom, nil } + +// IsCoreOSNode checks whether the pretty name of a node matches any of the +// coreos based image names +func IsCoreOSNode(node *corev1.Node) bool { + validOSImages := []string{osrelease.RHCOS, osrelease.FCOS, osrelease.SCOS} + + for _, img := range validOSImages { + if strings.Contains(node.Status.NodeInfo.OSImage, img) { + return true + } + } + return false +} diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index d9d6f4a40f..4426685f8f 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -50,6 +50,7 @@ import ( ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" templatectrl "github.com/openshift/machine-config-operator/pkg/controller/template" daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants" + "github.com/openshift/machine-config-operator/pkg/helpers" "github.com/openshift/machine-config-operator/pkg/server" "github.com/openshift/machine-config-operator/pkg/upgrademonitor" "github.com/openshift/machine-config-operator/pkg/version" @@ -1240,6 +1241,14 @@ func (optr *Operator) reconcileMachineOSBuilder(mob *appsv1.Deployment) error { return fmt.Errorf("could not get layered MachineConfigPools: %w", err) } + // If layeredMCP is found, verify that the node is coreos based as non coreos OS images + // cannot handle layered builds + if len(layeredMCPs) > 0 { + if err := optr.validateLayeredPoolNodes(layeredMCPs); err != nil { + return err + } + } + isRunning, err := optr.isMachineOSBuilderRunning(mob) // An unknown error occurred. Bail out here. if err != nil && !apierrors.IsNotFound(err) { @@ -1272,6 +1281,31 @@ func (optr *Operator) reconcileMachineOSBuilder(mob *appsv1.Deployment) error { return nil } +// Validate that the nodes part of layered pools are coreos based +func (optr *Operator) validateLayeredPoolNodes(layeredMCPs []*mcfgv1.MachineConfigPool) error { + nodes, err := optr.GetAllManagedNodes(layeredMCPs) + if err != nil { + return fmt.Errorf("could not get nodes in layered MachineConfigPools: %w", err) + } + // Error out if any of the nodes in the pool where OCL is enabled is a non-coreos node + errNodes := []error{} + for _, node := range nodes { + if !helpers.IsCoreOSNode(node) { + poolNames, err := helpers.GetPoolNamesForNode(optr.mcpLister, node) + if err != nil { + return err + } + err = fmt.Errorf("non-CoreOS OS %q detected on node %q in MachineConfigPool(s) %q", node.Status.NodeInfo.OSImage, node.Name, poolNames) + errNodes = append(errNodes, err) + } + } + + if len(errNodes) > 0 { + return fmt.Errorf("on-cluster layering is only supported on CoreOS-based nodes: %w", kubeErrs.NewAggregate(errNodes)) + } + return nil +} + // Delete the Machine OS Builder Deployment func (optr *Operator) stopMachineOSBuilderDeployment(name string) error { return optr.kubeClient.AppsV1().Deployments(ctrlcommon.MCONamespace).Delete(context.TODO(), name, metav1.DeleteOptions{}) diff --git a/test/e2e/node_os_test.go b/test/e2e/node_os_test.go index 5d5b90e0bf..f4ad08d3c8 100644 --- a/test/e2e/node_os_test.go +++ b/test/e2e/node_os_test.go @@ -5,6 +5,7 @@ import ( "strings" "testing" + "github.com/openshift/machine-config-operator/pkg/daemon/osrelease" "github.com/openshift/machine-config-operator/test/framework" "github.com/openshift/machine-config-operator/test/helpers" "github.com/stretchr/testify/assert" @@ -13,12 +14,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -const ( - rhcos string = "Red Hat Enterprise Linux CoreOS" - scos string = "CentOS Stream CoreOS" - fcos string = "Fedora Linux" -) - // Verifies that the OS on each node is identifiable using the mechanism the // MCD does. This is done to ensure that we get an early warning if the // contents of /etc/os-release or /usr/lib/os-release unexpectedly change. @@ -39,11 +34,11 @@ func TestOSDetection(t *testing.T) { assert.False(t, nodeOSRelease.OS.IsLikeTraditionalRHEL7(), "expected IsLikeTraditionalRHEL7() to be false: %s", nodeOSRelease.EtcContent) switch { - case strings.Contains(nodeOSRelease.EtcContent, rhcos): + case strings.Contains(nodeOSRelease.EtcContent, osrelease.RHCOS): assertRHCOS(t, nodeOSRelease, node) - case strings.Contains(nodeOSRelease.EtcContent, scos): + case strings.Contains(nodeOSRelease.EtcContent, osrelease.SCOS): assertSCOS(t, nodeOSRelease, node) - case strings.Contains(nodeOSRelease.EtcContent, fcos): + case strings.Contains(nodeOSRelease.EtcContent, osrelease.FCOS): assertFCOS(t, nodeOSRelease, node) default: t.Fatalf("unknown OS on node %s detected: %s", node.Name, nodeOSRelease.EtcContent) @@ -60,25 +55,25 @@ func assertRHCOS(t *testing.T, nodeOSRelease helpers.NodeOSRelease, node corev1. rhelVersion := nodeOSRelease.OS.OSRelease().ADDITIONAL_FIELDS["RHEL_VERSION"] if strings.Contains(nodeOSRelease.EtcContent, "RHEL_VERSION=\"8.") { // This pattern intentionally unterminated so it matches all RHCOS 8 versions - t.Logf("Identified %s %s on node %s", rhcos, rhelVersion, node.Name) + t.Logf("Identified %s %s on node %s", osrelease.RHCOS, rhelVersion, node.Name) assert.False(t, nodeOSRelease.OS.IsEL9(), "expected < RHCOS 9.0: %s", nodeOSRelease.EtcContent) } if strings.Contains(nodeOSRelease.EtcContent, "RHEL_VERSION=\"9.") { // This pattern intentionally unterminated so it matches all RHCOS 9 versions - t.Logf("Identified %s %s on node %s", rhcos, rhelVersion, node.Name) + t.Logf("Identified %s %s on node %s", osrelease.RHCOS, rhelVersion, node.Name) assert.True(t, nodeOSRelease.OS.IsEL9(), "expected >= RHCOS 9.0+: %s", nodeOSRelease.EtcContent) } } func assertSCOS(t *testing.T, nodeOSRelease helpers.NodeOSRelease, node corev1.Node) { - t.Logf("Identified %s on node %s", scos, node.Name) + t.Logf("Identified %s on node %s", osrelease.SCOS, node.Name) assert.True(t, nodeOSRelease.OS.IsEL(), "expected IsEL() to be true: %s", nodeOSRelease.EtcContent) assert.True(t, nodeOSRelease.OS.IsEL9(), "expected IsEL9() to be true: %s", nodeOSRelease.EtcContent) assert.False(t, nodeOSRelease.OS.IsFCOS(), "expected IsFCOS() to be false: %s", nodeOSRelease.EtcContent) } func assertFCOS(t *testing.T, nodeOSRelease helpers.NodeOSRelease, node corev1.Node) { - t.Logf("Identified OS %s on node %s", fcos, node.Name) + t.Logf("Identified OS %s on node %s", osrelease.FCOS, node.Name) assert.False(t, nodeOSRelease.OS.IsEL(), "expected IsEL() to be false: %s", nodeOSRelease.EtcContent) assert.False(t, nodeOSRelease.OS.IsEL9(), "expected IsEL9() to be false: %s", nodeOSRelease.EtcContent) assert.False(t, nodeOSRelease.OS.IsSCOS(), "expected IsSCOS() to be false: %s", nodeOSRelease.EtcContent)