diff --git a/cmd/machine-config-operator/start.go b/cmd/machine-config-operator/start.go index 22d7323dea..4a2426c409 100644 --- a/cmd/machine-config-operator/start.go +++ b/cmd/machine-config-operator/start.go @@ -93,6 +93,7 @@ func runStartCmd(_ *cobra.Command, _ []string) { ctrlctx.KubeInformerFactory.Core().V1().Nodes(), ctrlctx.KubeMAOSharedInformer.Core().V1().Secrets(), ctrlctx.ConfigInformerFactory.Config().V1().Images(), + ctrlctx.ConfigInformerFactory.Config().V1().ClusterVersions(), ctrlctx.KubeNamespacedInformerFactory.Core().V1().ServiceAccounts(), ctrlctx.KubeNamespacedInformerFactory.Core().V1().Secrets(), ctrlctx.OpenShiftConfigKubeNamespacedInformerFactory.Core().V1().ConfigMaps(), diff --git a/pkg/apihelpers/apihelpers.go b/pkg/apihelpers/apihelpers.go index bb8ea94de9..208f88b5a9 100644 --- a/pkg/apihelpers/apihelpers.go +++ b/pkg/apihelpers/apihelpers.go @@ -501,3 +501,31 @@ func GetManagedBootImagesWithUpdateDisabled() opv1.ManagedBootImages { func GetManagedBootImagesWithNoConfiguration() opv1.ManagedBootImages { return opv1.ManagedBootImages{} } + +// GetSkewEnforcementStatusAutomaticWithOCPVersion returns a BootImageSkewEnforcementStatus with Automatic mode and the given OCP version. +func GetSkewEnforcementStatusAutomaticWithOCPVersion(ocpVersion string) opv1.BootImageSkewEnforcementStatus { + return opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusAutomatic, + Automatic: opv1.ClusterBootImageAutomatic{ + OCPVersion: ocpVersion, + }, + } +} + +// GetSkewEnforcementStatusManualWithOCPVersion returns a BootImageSkewEnforcementStatus with Manual mode and the given OCP version. +func GetSkewEnforcementStatusManualWithOCPVersion(ocpVersion string) opv1.BootImageSkewEnforcementStatus { + return opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusManual, + Manual: opv1.ClusterBootImageManual{ + Mode: opv1.ClusterBootImageSpecModeOCPVersion, + OCPVersion: ocpVersion, + }, + } +} + +// GetSkewEnforcementStatusNone returns a BootImageSkewEnforcementStatus with None mode. +func GetSkewEnforcementStatusNone() opv1.BootImageSkewEnforcementStatus { + return opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusNone, + } +} diff --git a/pkg/controller/common/constants.go b/pkg/controller/common/constants.go index d3e4c7578c..406a764e96 100644 --- a/pkg/controller/common/constants.go +++ b/pkg/controller/common/constants.go @@ -149,6 +149,10 @@ const ( // NodeSizingEnabledEnvPath is the file path for the node sizing enabled environment file NodeSizingEnabledEnvPath = "/etc/node-sizing-enabled.env" + // Current Boot Image Skew Limits + // Note: Update units in status_test.go when the following are bumped + RHCOSVersionBootImageSkewLimit = "9.2" + OCPVersionBootImageSkewLimit = "4.13.0" ) // Commonly-used MCO ConfigMap names diff --git a/pkg/controller/machine-set-boot-image/cpms_helpers.go b/pkg/controller/machine-set-boot-image/cpms_helpers.go index 48b11f481f..7c63693625 100644 --- a/pkg/controller/machine-set-boot-image/cpms_helpers.go +++ b/pkg/controller/machine-set-boot-image/cpms_helpers.go @@ -277,7 +277,7 @@ func reconcilePlatformCPMS[T any]( configMap *corev1.ConfigMap, arch string, secretClient clientset.Interface, - reconcileProviderSpec func(*stream.Stream, string, *osconfigv1.Infrastructure, *T, string, clientset.Interface) (bool, *T, error), + reconcileProviderSpec func(*stream.Stream, string, *osconfigv1.Infrastructure, *T, string, clientset.Interface) (bool, bool, *T, error), ) (patchRequired bool, newCPMS *machinev1.ControlPlaneMachineSet, err error) { klog.Infof("Reconciling controlplanemachineset %s on %s, with arch %s", cpms.Name, string(infra.Status.PlatformStatus.Type), arch) @@ -294,7 +294,7 @@ func reconcilePlatformCPMS[T any]( } // Reconcile the provider spec - patchRequired, newProviderSpec, err := reconcileProviderSpec(streamData, arch, infra, providerSpec, cpms.Name, secretClient) + patchRequired, _, newProviderSpec, err := reconcileProviderSpec(streamData, arch, infra, providerSpec, cpms.Name, secretClient) if err != nil { return false, nil, err } diff --git a/pkg/controller/machine-set-boot-image/machine_set_boot_image_controller.go b/pkg/controller/machine-set-boot-image/machine_set_boot_image_controller.go index f69c029474..8e150d844f 100644 --- a/pkg/controller/machine-set-boot-image/machine_set_boot_image_controller.go +++ b/pkg/controller/machine-set-boot-image/machine_set_boot_image_controller.go @@ -14,6 +14,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilruntime "k8s.io/apimachinery/pkg/util/runtime" + k8sversion "k8s.io/apimachinery/pkg/util/version" coreinformersv1 "k8s.io/client-go/informers/core/v1" clientset "k8s.io/client-go/kubernetes" corev1client "k8s.io/client-go/kubernetes/typed/core/v1" @@ -72,6 +73,7 @@ type Controller struct { // Stats structure for local bookkeeping of machine resources type MachineResourceStats struct { inProgress int + skippedCount int erroredCount int totalCount int } @@ -379,8 +381,10 @@ func (ctrl *Controller) updateMachineConfiguration(oldMC, newMC interface{}) { return } - // Only take action if the there is an actual change in the MachineConfiguration's ManagedBootImagesStatus - if reflect.DeepEqual(oldMachineConfiguration.Status.ManagedBootImagesStatus, newMachineConfiguration.Status.ManagedBootImagesStatus) { + // Only take action if the there is an actual change in the MachineConfiguration's ManagedBootImagesStatus or BootImageSkewEnforcementStatus(when bootimageskewenforcement feature gate is enabled) + if reflect.DeepEqual(oldMachineConfiguration.Status.ManagedBootImagesStatus, newMachineConfiguration.Status.ManagedBootImagesStatus) && + (!ctrl.fgHandler.Enabled(features.FeatureGateBootImageSkewEnforcement) || + reflect.DeepEqual(oldMachineConfiguration.Status.BootImageSkewEnforcementStatus, newMachineConfiguration.Status.BootImageSkewEnforcementStatus)) { return } @@ -457,33 +461,78 @@ func (ctrl *Controller) updateConditions(newReason string, syncError error, targ } // Only make an API call if there is an update to the Conditions field if !reflect.DeepEqual(newConditions, mcop.Status.Conditions) { - ctrl.updateMachineConfigurationStatus(mcop, newConditions) + mcop.Status.Conditions = newConditions + ctrl.updateMachineConfigurationStatus(mcop.Status) } } -// updateMachineConfigurationStatus updates the MachineConfiguration status with new conditions -// using retry logic to handle concurrent updates. -func (ctrl *Controller) updateMachineConfigurationStatus(mcop *opv1.MachineConfiguration, newConditions []metav1.Condition) { +// updateClusterBootImage updates the cluster boot image record if the skew enforcement is set to Automatic mode. +func (ctrl *Controller) updateClusterBootImage() { + ctrl.conditionMutex.Lock() + defer ctrl.conditionMutex.Unlock() + mcop, err := ctrl.mcopClient.OperatorV1().MachineConfigurations().Get(context.TODO(), ctrlcommon.MCOOperatorKnobsObjectName, metav1.GetOptions{}) + if err != nil { + klog.Errorf("error updating cluster boot image record: %s", err) + return + } + // No action to take if not in automatic mode + if mcop.Status.BootImageSkewEnforcementStatus.Mode != opv1.BootImageSkewEnforcementModeStatusAutomatic { + return + } + + // Get OCP version of last boot image update from configmap + configMap, err := ctrl.mcoCmLister.ConfigMaps(ctrlcommon.MCONamespace).Get(ctrlcommon.BootImagesConfigMapName) + if err != nil { + klog.Warningf("Failed to get boot images configmap: %v, skipping cluster boot image record update", err) + return + } + + releaseVersion, found := configMap.Data[ctrlcommon.OCPReleaseVersionKey] + if !found { + klog.Warningf("OCP release version not found in boot images configmap, skipping cluster boot image record update") + return + } + + // Parse and extract semantic version (major.minor.patch) for API validation + parsedVersion, err := k8sversion.ParseGeneric(releaseVersion) + if err != nil { + klog.Warningf("Failed to parse release version %q from configmap: %v, skipping cluster boot image record update", releaseVersion, err) + return + } + ocpVersion := fmt.Sprintf("%d.%d.%d", parsedVersion.Major(), parsedVersion.Minor(), parsedVersion.Patch()) + + newBootImageSkewEnforcementStatus := mcop.Status.BootImageSkewEnforcementStatus.DeepCopy() + newBootImageSkewEnforcementStatus.Automatic = opv1.ClusterBootImageAutomatic{ + OCPVersion: ocpVersion, + } + // Only make an API call if there is an update to the skew enforcement status + if !reflect.DeepEqual(mcop.Status.BootImageSkewEnforcementStatus, newBootImageSkewEnforcementStatus) { + mcop.Status.BootImageSkewEnforcementStatus = *newBootImageSkewEnforcementStatus + ctrl.updateMachineConfigurationStatus(mcop.Status) + } +} + +// updateMachineConfigurationStatus updates the MachineConfiguration status using retry logic to handle concurrent updates. +func (ctrl *Controller) updateMachineConfigurationStatus(mcopStatus opv1.MachineConfigurationStatus) { // Using a retry here as there may be concurrent reconiliation loops updating conditions for multiple // resources at the same time and their local stores may be out of date - if !reflect.DeepEqual(mcop.Status.Conditions, newConditions) { - klog.V(4).Infof("%v", newConditions) - if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - mcop, err := ctrl.mcopClient.OperatorV1().MachineConfigurations().Get(context.TODO(), ctrlcommon.MCOOperatorKnobsObjectName, metav1.GetOptions{}) - if err != nil { - return err - } - mcop.Status.Conditions = newConditions - _, err = ctrl.mcopClient.OperatorV1().MachineConfigurations().UpdateStatus(context.TODO(), mcop, metav1.UpdateOptions{}) - if err != nil { - return err - } - return nil - }); err != nil { - klog.Errorf("error updating MachineConfiguration status: %v", err) + klog.V(2).Infof("MachineConfiguration status update: %v", mcopStatus) + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + mcop, err := ctrl.mcopClient.OperatorV1().MachineConfigurations().Get(context.TODO(), ctrlcommon.MCOOperatorKnobsObjectName, metav1.GetOptions{}) + if err != nil { + return err + } + mcop.Status = mcopStatus + _, err = ctrl.mcopClient.OperatorV1().MachineConfigurations().UpdateStatus(context.TODO(), mcop, metav1.UpdateOptions{}) + if err != nil { + return err } + return nil + }); err != nil { + klog.Errorf("error updating MachineConfiguration status: %v", err) } + } // getDefaultConditions returns the default boot image update conditions when no diff --git a/pkg/controller/machine-set-boot-image/machine_set_boot_image_controller_test.go b/pkg/controller/machine-set-boot-image/machine_set_boot_image_controller_test.go index 69e8fd9fd7..d2a9e8d8bd 100644 --- a/pkg/controller/machine-set-boot-image/machine_set_boot_image_controller_test.go +++ b/pkg/controller/machine-set-boot-image/machine_set_boot_image_controller_test.go @@ -740,7 +740,7 @@ func TestReconcileAzureProviderSpec(t *testing.T) { testStreamData = tt.streamData } - patchRequired, updatedProviderSpec, err := reconcileAzureProviderSpec( + patchRequired, _, updatedProviderSpec, err := reconcileAzureProviderSpec( testStreamData, tt.arch, infra, diff --git a/pkg/controller/machine-set-boot-image/ms_helpers.go b/pkg/controller/machine-set-boot-image/ms_helpers.go index f87d0adbd6..f825fd314c 100644 --- a/pkg/controller/machine-set-boot-image/ms_helpers.go +++ b/pkg/controller/machine-set-boot-image/ms_helpers.go @@ -8,6 +8,7 @@ import ( "strings" "time" + "github.com/openshift/api/features" machinev1beta1 "github.com/openshift/api/machine/v1beta1" opv1 "github.com/openshift/api/operator/v1" ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" @@ -95,7 +96,7 @@ func (ctrl *Controller) syncMAPIMachineSets(reason string) { ctrl.updateConditions(reason, nil, opv1.MachineConfigurationBootImageUpdateProgressing) for _, machineSet := range mapiMachineSets { - err := ctrl.syncMAPIMachineSet(machineSet) + patchSkipped, err := ctrl.syncMAPIMachineSet(machineSet) if err == nil { ctrl.mapiStats.inProgress++ } else { @@ -103,15 +104,24 @@ func (ctrl *Controller) syncMAPIMachineSets(reason string) { syncErrors = append(syncErrors, fmt.Errorf("error syncing MAPI MachineSet %s: %v", machineSet.Name, err)) ctrl.mapiStats.erroredCount++ } + if patchSkipped { + ctrl.mapiStats.skippedCount++ + } // Update progressing conditions every step of the loop ctrl.updateConditions(reason, nil, opv1.MachineConfigurationBootImageUpdateProgressing) } // Update/Clear degrade conditions based on errors from this loop ctrl.updateConditions(reason, kubeErrs.NewAggregate(syncErrors), opv1.MachineConfigurationBootImageUpdateDegraded) + // If no machinesets were skipped, update the cluster boot image record + if ctrl.fgHandler.Enabled(features.FeatureGateBootImageSkewEnforcement) { + if ctrl.mapiStats.skippedCount == 0 { + ctrl.updateClusterBootImage() + } + } } // syncMAPIMachineSet will attempt to reconcile the provided machineset -func (ctrl *Controller) syncMAPIMachineSet(machineSet *machinev1beta1.MachineSet) error { +func (ctrl *Controller) syncMAPIMachineSet(machineSet *machinev1beta1.MachineSet) (bool, error) { startTime := time.Now() klog.V(4).Infof("Started syncing MAPI machineset %q (%v)", machineSet.Name, startTime) @@ -123,26 +133,26 @@ func (ctrl *Controller) syncMAPIMachineSet(machineSet *machinev1beta1.MachineSet // that the machineset may be managed by another workflow and should not be reconciled. if len(machineSet.GetOwnerReferences()) != 0 { klog.Infof("machineset %s has OwnerReference: %v, skipping boot image update", machineSet.GetOwnerReferences()[0].Kind+"/"+machineSet.GetOwnerReferences()[0].Name, machineSet.Name) - return nil + return true, nil } if os, ok := machineSet.Spec.Template.Labels[OSLabelKey]; ok { if os == "Windows" { klog.Infof("machineset %s has a windows os label, skipping boot image update", machineSet.Name) - return nil + return false, nil } } // Fetch the architecture type of this machineset arch, err := getArchFromMachineSet(machineSet) if err != nil { - return fmt.Errorf("failed to fetch arch during machineset sync: %w", err) + return false, fmt.Errorf("failed to fetch arch during machineset sync: %w", err) } // Fetch the infra object to determine the platform type infra, err := ctrl.infraLister.Get("cluster") if err != nil { - return fmt.Errorf("failed to fetch infra object during machineset sync: %w", err) + return false, fmt.Errorf("failed to fetch infra object during machineset sync: %w", err) } // Fetch the bootimage configmap & ensure it has been stamped by the operator. This is done by @@ -151,44 +161,44 @@ func (ctrl *Controller) syncMAPIMachineSet(machineSet *machinev1beta1.MachineSet // If it hasn't been updated, exit and wait for a resync. configMap, err := ctrl.mcoCmLister.ConfigMaps(ctrlcommon.MCONamespace).Get(ctrlcommon.BootImagesConfigMapName) if err != nil { - return fmt.Errorf("failed to fetch coreos-bootimages config map during machineset sync: %w", err) + return false, fmt.Errorf("failed to fetch coreos-bootimages config map during machineset sync: %w", err) } versionHashFromCM, versionHashFound := configMap.Data[ctrlcommon.MCOVersionHashKey] if !versionHashFound { klog.Infof("failed to find mco version hash in %s configmap, sync will exit to wait for the MCO upgrade to complete", ctrlcommon.BootImagesConfigMapName) - return nil + return true, nil } if versionHashFromCM != operatorversion.Hash { klog.Infof("mismatch between MCO hash version stored in configmap and current MCO version; sync will exit to wait for the MCO upgrade to complete") - return nil + return true, nil } releaseVersionFromCM, releaseVersionFound := configMap.Data[ctrlcommon.OCPReleaseVersionKey] if !releaseVersionFound { klog.Infof("failed to find OCP release version in %s configmap, sync will exit to wait for the MCO upgrade to complete", ctrlcommon.BootImagesConfigMapName) - return nil + return true, nil } if releaseVersionFromCM != operatorversion.ReleaseVersion { klog.Infof("mismatch between OCP release version stored in configmap and current MCO release version; sync will exit to wait for the MCO upgrade to complete") - return nil + return true, nil } // Check if the this MachineSet requires an update - patchRequired, newMachineSet, err := checkMachineSet(infra, machineSet, configMap, arch, ctrl.kubeClient) + patchRequired, patchSkipped, newMachineSet, err := checkMachineSet(infra, machineSet, configMap, arch, ctrl.kubeClient) if err != nil { - return fmt.Errorf("failed to reconcile machineset %s, err: %w", machineSet.Name, err) + return false, fmt.Errorf("failed to reconcile machineset %s, err: %w", machineSet.Name, err) } // Patch the machineset if required if patchRequired { // First, check if we're hot looping if ctrl.checkMAPIMachineSetHotLoop(newMachineSet) { - return fmt.Errorf("refusing to reconcile machineset %s, hot loop detected. Please opt-out of boot image updates, adjust your machine provisioning workflow to prevent hot loops and opt back in to resume boot image updates", machineSet.Name) + return false, fmt.Errorf("refusing to reconcile machineset %s, hot loop detected. Please opt-out of boot image updates, adjust your machine provisioning workflow to prevent hot loops and opt back in to resume boot image updates", machineSet.Name) } klog.Infof("Patching MAPI machineset %s", machineSet.Name) - return ctrl.patchMachineSet(machineSet, newMachineSet) + return false, ctrl.patchMachineSet(machineSet, newMachineSet) } klog.Infof("No patching required for MAPI machineset %s", machineSet.Name) - return nil + return patchSkipped, nil } // Checks against a local store of boot image updates to detect hot looping diff --git a/pkg/controller/machine-set-boot-image/platform_helpers.go b/pkg/controller/machine-set-boot-image/platform_helpers.go index cea8277232..326f1e6092 100644 --- a/pkg/controller/machine-set-boot-image/platform_helpers.go +++ b/pkg/controller/machine-set-boot-image/platform_helpers.go @@ -45,7 +45,7 @@ type publisherOffer struct { // This function calls the appropriate reconcile function based on the infra type // On success, it will return a bool indicating if a patch is required, and an updated // machineset object if any. It will return an error if any of the above steps fail. -func checkMachineSet(infra *osconfigv1.Infrastructure, machineSet *machinev1beta1.MachineSet, configMap *corev1.ConfigMap, arch string, secretClient clientset.Interface) (bool, *machinev1beta1.MachineSet, error) { +func checkMachineSet(infra *osconfigv1.Infrastructure, machineSet *machinev1beta1.MachineSet, configMap *corev1.ConfigMap, arch string, secretClient clientset.Interface) (bool, bool, *machinev1beta1.MachineSet, error) { switch infra.Status.PlatformStatus.Type { case osconfigv1.AWSPlatformType: return reconcilePlatform(machineSet, infra, configMap, arch, secretClient, reconcileAWSProviderSpec) @@ -57,7 +57,7 @@ func checkMachineSet(infra *osconfigv1.Infrastructure, machineSet *machinev1beta return reconcilePlatform(machineSet, infra, configMap, arch, secretClient, reconcileVSphereProviderSpec) default: klog.Infof("Skipping machineset %s, unsupported platform %s", machineSet.Name, infra.Status.PlatformStatus.Type) - return false, nil, nil + return false, false, nil, nil } } @@ -69,44 +69,44 @@ func reconcilePlatform[T any]( configMap *corev1.ConfigMap, arch string, secretClient clientset.Interface, - reconcileProviderSpec func(*stream.Stream, string, *osconfigv1.Infrastructure, *T, string, clientset.Interface) (bool, *T, error), -) (patchRequired bool, newMachineSet *machinev1beta1.MachineSet, err error) { + reconcileProviderSpec func(*stream.Stream, string, *osconfigv1.Infrastructure, *T, string, clientset.Interface) (bool, bool, *T, error), +) (patchRequired, patchSkipped bool, newMachineSet *machinev1beta1.MachineSet, err error) { klog.Infof("Reconciling MAPI machineset %s on %s, with arch %s", machineSet.Name, string(infra.Status.PlatformStatus.Type), arch) // Unmarshal the provider spec providerSpec := new(T) if err := unmarshalProviderSpec(machineSet, providerSpec); err != nil { - return false, nil, err + return false, false, nil, err } // Unmarshal the configmap into a stream object streamData := new(stream.Stream) if err := unmarshalStreamDataConfigMap(configMap, streamData); err != nil { - return false, nil, err + return false, false, nil, err } // Reconcile the provider spec - patchRequired, newProviderSpec, err := reconcileProviderSpec(streamData, arch, infra, providerSpec, machineSet.Name, secretClient) + patchRequired, patchSkipped, newProviderSpec, err := reconcileProviderSpec(streamData, arch, infra, providerSpec, machineSet.Name, secretClient) if err != nil { - return false, nil, err + return false, false, nil, err } // If no patch is required, exit early if !patchRequired { - return false, nil, nil + return false, patchSkipped, nil, nil } // If patch is required, marshal the new providerspec into the machineset newMachineSet = machineSet.DeepCopy() if err := marshalProviderSpec(newMachineSet, newProviderSpec); err != nil { - return false, nil, err + return false, false, nil, err } - return patchRequired, newMachineSet, nil + return patchRequired, false, newMachineSet, nil } // reconcileGCPProviderSpec reconciles the GCP provider spec by updating boot images // Returns whether a patch is required, the updated provider spec, and any error -func reconcileGCPProviderSpec(streamData *stream.Stream, arch string, _ *osconfigv1.Infrastructure, providerSpec *machinev1beta1.GCPMachineProviderSpec, machineSetName string, secretClient clientset.Interface) (bool, *machinev1beta1.GCPMachineProviderSpec, error) { +func reconcileGCPProviderSpec(streamData *stream.Stream, arch string, _ *osconfigv1.Infrastructure, providerSpec *machinev1beta1.GCPMachineProviderSpec, machineSetName string, secretClient clientset.Interface) (bool, bool, *machinev1beta1.GCPMachineProviderSpec, error) { // Construct the new target bootimage from the configmap // This formatting is based on how the installer constructs @@ -131,7 +131,7 @@ func reconcileGCPProviderSpec(streamData *stream.Stream, arch string, _ *osconfi // If image does not start with "projects/rhcos-cloud/global/images", this is a custom boot image. if !strings.HasPrefix(disk.Image, "projects/rhcos-cloud/global/images") { klog.Infof("current boot image %s is unknown, skipping update of MachineSet %s", disk.Image, machineSetName) - return false, nil, nil + return false, true, nil, nil } patchRequired = true newProviderSpec.Disks[idx].Image = newBootImage @@ -140,16 +140,16 @@ func reconcileGCPProviderSpec(streamData *stream.Stream, arch string, _ *osconfi if patchRequired { // Ensure the ignition stub is the minimum acceptable spec required for boot image updates if err := upgradeStubIgnitionIfRequired(providerSpec.UserDataSecret.Name, secretClient); err != nil { - return false, nil, err + return false, false, nil, err } } - return patchRequired, newProviderSpec, nil + return patchRequired, false, newProviderSpec, nil } // reconcileAWSProviderSpec reconciles the AWS provider spec by updating AMIs // Returns whether a patch is required, the updated provider spec, and any error -func reconcileAWSProviderSpec(streamData *stream.Stream, arch string, _ *osconfigv1.Infrastructure, providerSpec *machinev1beta1.AWSMachineProviderConfig, machineSetName string, secretClient clientset.Interface) (bool, *machinev1beta1.AWSMachineProviderConfig, error) { +func reconcileAWSProviderSpec(streamData *stream.Stream, arch string, _ *osconfigv1.Infrastructure, providerSpec *machinev1beta1.AWSMachineProviderConfig, machineSetName string, secretClient clientset.Interface) (bool, bool, *machinev1beta1.AWSMachineProviderConfig, error) { // Extract the region from the Placement field region := providerSpec.Placement.Region @@ -159,7 +159,7 @@ func reconcileAWSProviderSpec(streamData *stream.Stream, arch string, _ *osconfi if err != nil { // On a region not found error, log and skip this MachineSet klog.Infof("failed to get AMI for region %s: %v, skipping update of MachineSet %s", region, err, machineSetName) - return false, nil, nil + return false, true, nil, nil } newAMI := awsRegionImage.Image @@ -172,20 +172,20 @@ func reconcileAWSProviderSpec(streamData *stream.Stream, arch string, _ *osconfi // Related bug: https://issues.redhat.com/browse/OCPBUGS-57506 if newProviderSpec.AMI.ID == nil { klog.Infof("current AMI.ID is undefined, skipping update of MachineSet %s", machineSetName) - return false, nil, nil + return false, true, nil, nil } currentAMI := *newProviderSpec.AMI.ID // If the current AMI matches target AMI, nothing to do here if newAMI == currentAMI { - return false, nil, nil + return false, false, nil, nil } // Validate that we're allowed to update from the current AMI if !AllowedAMIs.Has(currentAMI) { klog.Infof("current AMI %s is unknown, skipping update of MachineSet %s", currentAMI, machineSetName) - return false, nil, nil + return false, true, nil, nil } klog.Infof("Current image: %s: %s", region, currentAMI) @@ -199,27 +199,27 @@ func reconcileAWSProviderSpec(streamData *stream.Stream, arch string, _ *osconfi // Ensure the ignition stub is the minimum acceptable spec required for boot image updates if err := upgradeStubIgnitionIfRequired(providerSpec.UserDataSecret.Name, secretClient); err != nil { - return false, nil, err + return false, false, nil, err } - return true, newProviderSpec, nil + return true, false, newProviderSpec, nil } -func reconcileVSphereProviderSpec(streamData *stream.Stream, arch string, infra *osconfigv1.Infrastructure, providerSpec *machinev1beta1.VSphereMachineProviderSpec, _ string, secretClient clientset.Interface) (bool, *machinev1beta1.VSphereMachineProviderSpec, error) { +func reconcileVSphereProviderSpec(streamData *stream.Stream, arch string, infra *osconfigv1.Infrastructure, providerSpec *machinev1beta1.VSphereMachineProviderSpec, _ string, secretClient clientset.Interface) (bool, bool, *machinev1beta1.VSphereMachineProviderSpec, error) { if infra.Spec.PlatformSpec.VSphere == nil { klog.Warningf("Reconcile skipped: VSphere field is nil in PlatformSpec %v", infra.Spec.PlatformSpec) - return false, nil, nil + return false, false, nil, nil } streamArch, err := streamData.GetArchitecture(arch) if err != nil { - return false, nil, err + return false, false, nil, err } artifacts := streamArch.Artifacts["vmware"] if artifacts.Release == "" { - return false, nil, fmt.Errorf("%s: artifact '%s' not found", streamData.FormatPrefix(arch), "vmware") + return false, false, nil, fmt.Errorf("%s: artifact '%s' not found", streamData.FormatPrefix(arch), "vmware") } newProviderSpec := providerSpec.DeepCopy() @@ -227,38 +227,38 @@ func reconcileVSphereProviderSpec(streamData *stream.Stream, arch string, infra // Fetch the creds configmap credsSc, err := secretClient.CoreV1().Secrets("kube-system").Get(context.TODO(), "vsphere-creds", metav1.GetOptions{}) if err != nil { - return false, nil, fmt.Errorf("failed to fetch vsphere-creds Secret during machineset sync: %w", err) + return false, false, nil, fmt.Errorf("failed to fetch vsphere-creds Secret during machineset sync: %w", err) } newBootImg, patchRequired, err := createNewVMTemplate(streamData, providerSpec, infra, credsSc, arch, artifacts.Release) if err != nil { - return false, nil, err + return false, false, nil, err } // If patch is required, marshal the new providerspec into the machineset if patchRequired { // Ensure the ignition stub is the minimum acceptable spec required for boot image updates if err := upgradeStubIgnitionIfRequired(providerSpec.UserDataSecret.Name, secretClient); err != nil { - return false, nil, err + return false, false, nil, err } newProviderSpec.Template = newBootImg } - return patchRequired, newProviderSpec, nil + return patchRequired, false, newProviderSpec, nil } // reconcileAzureProviderSpec reconciles the Azure provider spec by updating AMIs // Returns whether a patch is required, the updated provider spec, and any error -func reconcileAzureProviderSpec(streamData *stream.Stream, arch string, _ *osconfigv1.Infrastructure, providerSpec *machinev1beta1.AzureMachineProviderSpec, machineSetName string, secretClient clientset.Interface) (bool, *machinev1beta1.AzureMachineProviderSpec, error) { +func reconcileAzureProviderSpec(streamData *stream.Stream, arch string, _ *osconfigv1.Infrastructure, providerSpec *machinev1beta1.AzureMachineProviderSpec, machineSetName string, secretClient clientset.Interface) (bool, bool, *machinev1beta1.AzureMachineProviderSpec, error) { if arch == "ppc64le" || arch == "s390x" { klog.Infof("Skipping update for %s, machinesets/controlplanemachinesets with arch %s are not supported for Azure", machineSetName, arch) - return false, nil, nil + return false, false, nil, nil } if providerSpec.SecurityProfile != nil && providerSpec.SecurityProfile.Settings.SecurityType != "" { klog.Infof("Skipping update for %s, machinesets/controlplanemachinesets with a SecurityType defined(%s in this case) is not currently supported for Azure", machineSetName, providerSpec.SecurityProfile.Settings.SecurityType) - return false, nil, nil + return false, false, nil, nil } currentImage := providerSpec.Image @@ -272,24 +272,24 @@ func reconcileAzureProviderSpec(streamData *stream.Stream, arch string, _ *oscon // Determine the target image stream variant azureVariant, err := determineAzureVariant(usesLegacyImageUpload, currentImage) if err != nil { - return false, nil, err + return false, false, nil, err } // Extract RHCOS stream for the architecture of this machineSet streamArch, err := streamData.GetArchitecture(arch) if err != nil { - return false, nil, err + return false, false, nil, err } // Sanity check: On OKD clusters marketplace streams are not available, so they should be skipped for updates. // TODO: Determine if OKD azure clusters are even in use/tested if streamArch.RHELCoreOSExtensions.Marketplace == nil { klog.Infof("Skipping machineset %s, marketplace streams are not available", machineSetName) - return false, nil, nil + return false, true, nil, nil } if streamArch.RHELCoreOSExtensions.Marketplace.Azure == nil { klog.Infof("Skipping machineset %s, Azure marketplace streams are not available", machineSetName) - return false, nil, nil + return false, true, nil, nil } // There are two types to consider: hyperGenV1 & hyperGenV2. This determination @@ -317,13 +317,13 @@ func reconcileAzureProviderSpec(streamData *stream.Stream, arch string, _ *oscon // Determine target image from RHCOS stream targetImage, err := getTargetImageFromStream(streamArch, azureVariant, usesHyperVGen2, arch) if err != nil { - return false, nil, err + return false, false, nil, err } // If the current image matches, nothing to do here // Q: Should we enhance this to do version comparisons? if reflect.DeepEqual(currentImage, targetImage) { - return false, nil, nil + return false, false, nil, nil } klog.Infof("Current boot image version: %s", currentImage.Version) @@ -335,10 +335,10 @@ func reconcileAzureProviderSpec(streamData *stream.Stream, arch string, _ *oscon // Ensure the ignition stub is the minimum acceptable spec required for boot image updates if err := upgradeStubIgnitionIfRequired(providerSpec.UserDataSecret.Name, secretClient); err != nil { - return false, nil, err + return false, false, nil, err } - return true, newProviderSpec, nil + return true, false, newProviderSpec, nil } // getAzureImageFromStreamImage converts a stream marketplace image to an Azure machine image diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index da0e9cacb2..3c23929813 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -83,6 +83,7 @@ type Operator struct { syncHandler func(ic string) error imgLister configlistersv1.ImageLister + clusterVersionLister configlistersv1.ClusterVersionLister crdLister apiextlistersv1.CustomResourceDefinitionLister mcpLister mcfglistersv1.MachineConfigPoolLister msLister mcfglistersv1.MachineConfigNodeLister @@ -130,6 +131,7 @@ type Operator struct { dnsListerSynced cache.InformerSynced maoSecretInformerSynced cache.InformerSynced imgListerSynced cache.InformerSynced + clusterVersionListerSynced cache.InformerSynced mcoSAListerSynced cache.InformerSynced mcoSecretListerSynced cache.InformerSynced ocCmListerSynced cache.InformerSynced @@ -181,6 +183,7 @@ func New( nodeInformer coreinformersv1.NodeInformer, maoSecretInformer coreinformersv1.SecretInformer, imgInformer configinformersv1.ImageInformer, + clusterVersionInformer configinformersv1.ClusterVersionInformer, mcoSAInformer coreinformersv1.ServiceAccountInformer, mcoSecretInformer coreinformersv1.SecretInformer, ocCmInformer coreinformersv1.ConfigMapInformer, @@ -254,6 +257,7 @@ func New( dnsInformer.Informer(), maoSecretInformer.Informer(), imgInformer.Informer(), + clusterVersionInformer.Informer(), mcoSAInformer.Informer(), mcoSecretInformer.Informer(), ocSecretInformer.Informer(), @@ -274,6 +278,7 @@ func New( optr.syncHandler = optr.sync optr.imgLister = imgInformer.Lister() + optr.clusterVersionLister = clusterVersionInformer.Lister() optr.clusterCmLister = clusterCmInfomer.Lister() optr.clusterCmListerSynced = clusterCmInfomer.Informer().HasSynced optr.mcpLister = mcpInformer.Lister() @@ -292,6 +297,7 @@ func New( optr.nodeClusterListerSynced = nodeClusterInformer.Informer().HasSynced optr.imgListerSynced = imgInformer.Informer().HasSynced + optr.clusterVersionListerSynced = clusterVersionInformer.Informer().HasSynced optr.maoSecretInformerSynced = maoSecretInformer.Informer().HasSynced optr.serviceAccountInformerSynced = serviceAccountInfomer.Informer().HasSynced optr.clusterRoleInformerSynced = clusterRoleInformer.Informer().HasSynced @@ -374,6 +380,7 @@ func (optr *Operator) Run(workers int, stopCh <-chan struct{}) { optr.mcListerSynced, optr.dnsListerSynced, optr.imgListerSynced, + optr.clusterVersionListerSynced, optr.mcoSAListerSynced, optr.mcoSecretListerSynced, optr.ocCmListerSynced, diff --git a/pkg/operator/status.go b/pkg/operator/status.go index c41fd949cb..6084410179 100644 --- a/pkg/operator/status.go +++ b/pkg/operator/status.go @@ -13,6 +13,7 @@ import ( configv1 "github.com/openshift/api/config/v1" features "github.com/openshift/api/features" + opv1 "github.com/openshift/api/operator/v1" cov1helpers "github.com/openshift/library-go/pkg/config/clusteroperator/v1helpers" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" @@ -20,6 +21,7 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + k8sversion "k8s.io/apimachinery/pkg/util/version" "k8s.io/klog/v2" "k8s.io/utils/clock" @@ -271,6 +273,18 @@ func (optr *Operator) syncUpgradeableStatus(co *configv1.ClusterOperator) error Reason: asExpectedReason, } + // Check boot image skew upgradeable guards + skewErrorExists, skewErrorMessage, err := optr.checkBootImageSkewUpgradeableGuard() + if err != nil { + return err + } + + if skewErrorExists { + coStatusCondition.Status = configv1.ConditionFalse + coStatusCondition.Reason = "ClusterBootImageSkewError" + coStatusCondition.Message = skewErrorMessage + } + var degraded, interrupted bool for _, pool := range pools { interrupted = isPoolStatusConditionTrue(pool, mcfgv1.MachineConfigPoolBuildInterrupted) @@ -631,3 +645,170 @@ func machineConfigPoolStatus(fgHandler ctrlcommon.FeatureGatesHandler, pool *mcf func taskFailed(task string) string { return task + "Failed" } + +// checkBootImageSkewUpgradeableGuard checks if the boot image version is within acceptable limits. +// It returns an error if there is no skew enforcement opinion specified. If one is specified, +// it checks if boot image skew is within the expected limit. +func (optr *Operator) checkBootImageSkewUpgradeableGuard() (bool, string, error) { + // Check if feature gate is enabled + if !optr.fgHandler.Enabled(features.FeatureGateBootImageSkewEnforcement) { + return false, "", nil + } + + // Fetch MachineConfiguration + mcop, err := optr.mcopLister.Get(ctrlcommon.MCOOperatorKnobsObjectName) + if err != nil { + if apierrors.IsNotFound(err) { + klog.V(4).Infof("MachineConfiguration not found, skipping boot image skew enforcement") + return false, "", nil + } + return false, "", fmt.Errorf("failed to get MachineConfiguration: %w", err) + } + + // Perform boot image skew enforcement based on mode + skewLimitExceeded := false + skewLimitExceededMessage := "" + + switch mcop.Status.BootImageSkewEnforcementStatus.Mode { + case opv1.BootImageSkewEnforcementModeStatusAutomatic: + skewLimitExceeded, skewLimitExceededMessage = checkBootImageSkew( + mcop.Status.BootImageSkewEnforcementStatus.Automatic.OCPVersion, + mcop.Status.BootImageSkewEnforcementStatus.Automatic.RHCOSVersion, + ) + case opv1.BootImageSkewEnforcementModeStatusManual: + skewLimitExceeded, skewLimitExceededMessage = checkBootImageSkew( + mcop.Status.BootImageSkewEnforcementStatus.Manual.OCPVersion, + mcop.Status.BootImageSkewEnforcementStatus.Manual.RHCOSVersion, + ) + case opv1.BootImageSkewEnforcementModeStatusNone: + // TODO: Set a low level prom alert to set scaling risk + klog.V(4).Infof("evaluating boot image skew enforcement: mode set to None") + return false, "", nil + default: + // Sanity check, this should only be possible if status hasn't been populated yet. + return false, "", nil + } + + if skewLimitExceeded { + return true, fmt.Sprintf("Upgrades have been disabled because %s. To enable upgrades, please update your boot images following the documentation at [TODO: insert link], or disable boot image skew enforcement at [TODO: insert link]", skewLimitExceededMessage), nil + } + + return false, "", nil +} + +// checkBootImageSkew determines if the cluster's boot images are within acceptable version skew. +// It compares the oldest boot image version (currentOCPVersion, currentRHCOSVersion) against the minimum +// supported version. +// Returns true if the boot image version is older than the minimum, along with an error message. +func checkBootImageSkew(currentOCPVersion, currentRHCOSVersion string) (bool, string) { + + if currentOCPVersion != "" { + return checkOCPVersionSkew(currentOCPVersion) + } + + if currentRHCOSVersion != "" { + return checkRHCOSVersionSkew(currentRHCOSVersion) + } + + // This isn't possible due to API validations; more of a sanity check for safety + klog.Warningf("no boot image versions provided, skipping skew check") + return false, "" +} + +// checkOCPVersionSkew compares a version string against the minimum supported version. +// Returns true if the version is below the minimum, along with an error message. +func checkOCPVersionSkew(version string) (bool, string) { + // Parse the boot image version + bootImageVersion, err := k8sversion.ParseGeneric(version) + if err != nil { + klog.Warningf("Failed to parse boot image version %q: %v", version, err) + return false, "" + } + + // Parse the minimum supported version + minSupportedVersion, err := k8sversion.ParseGeneric(ctrlcommon.OCPVersionBootImageSkewLimit) + if err != nil { + klog.Errorf("Failed to parse OCPVersionBootImageSkewLimit constant %q: %v", ctrlcommon.OCPVersionBootImageSkewLimit, err) + return false, "" + } + + // Check if boot image version is less than the minimum supported version + if bootImageVersion.LessThan(minSupportedVersion) { + return true, fmt.Sprintf("the cluster is using OCP boot image version %s, which is below the minimum required version %s", + version, ctrlcommon.OCPVersionBootImageSkewLimit) + } + + klog.V(4).Infof("Boot image version %s meets minimum version requirement (>= %s)", + version, ctrlcommon.OCPVersionBootImageSkewLimit) + return false, "" +} + +// checkRHCOSVersionSkew compares an RHCOS version string against the minimum supported version. +// Returns true if the version is below the minimum, along with an error message. +// +// Note: RHCOS versions can either have formatting of [major].[minor].[datestamp(YYYYMMDD)]-[buildnumber] (example:9.6.20251023-0) or the legacy +// format of [major].[minor].[timestamp(YYYYMMDDHHmm)]-[buildnumber] (example: 48.84.202208021106-0). In the modern(or RHEL) formatting, we just +// need to compare [major.minor] against the RHCOS skew limit. In the legacy format, the minor version includes the whole RHEL major/minor +// and only that bit should be used to compare against the RHCOS skew limit. +func checkRHCOSVersionSkew(version string) (bool, string) { + // Split version to extract components + parts := strings.Split(version, ".") + if len(parts) < 3 { + klog.Warningf("Failed to parse RHCOS version %q: expected at least 3 parts", version) + return false, "" + } + + major := parts[0] + minor := parts[1] + + // Extract timestamp (remove build number suffix if present) + timestampPart := parts[2] + if idx := strings.Index(timestampPart, "-"); idx != -1 { + timestampPart = timestampPart[:idx] + } + + var versionToCompare string + + // Determine format based on timestamp length + switch len(timestampPart) { + case 8: + // Modern format (YYYYMMDD): compare major.minor directly + versionToCompare = fmt.Sprintf("%s.%s", major, minor) + case 12: + // Legacy format (YYYYMMDDHHmm): minor contains RHEL version (e.g., 84 = RHEL 8.4, 810 = RHEL 8.10) + // First digit is RHEL major, remaining digits are RHEL minor. + if len(minor) >= 2 { + versionToCompare = fmt.Sprintf("%s.%s", minor[:1], minor[1:]) + } else { + klog.Warningf("Failed to parse RHCOS legacy version %q: minor version too short", version) + return false, "" + } + default: + klog.Warningf("Failed to parse RHCOS version %q: unexpected timestamp format (length %d)", version, len(timestampPart)) + return false, "" + } + + // Parse the version to compare + bootImageVersion, err := k8sversion.ParseGeneric(versionToCompare) + if err != nil { + klog.Warningf("Failed to parse RHCOS version %q (extracted %q): %v", version, versionToCompare, err) + return false, "" + } + + // Parse the minimum supported version + minSupportedVersion, err := k8sversion.ParseGeneric(ctrlcommon.RHCOSVersionBootImageSkewLimit) + if err != nil { + klog.Errorf("Failed to parse RHCOSVersionBootImageSkewLimit constant %q: %v", ctrlcommon.RHCOSVersionBootImageSkewLimit, err) + return false, "" + } + + // Check if boot image version is less than the minimum supported version + if bootImageVersion.LessThan(minSupportedVersion) { + return true, fmt.Sprintf("the cluster is using RHCOS boot image version %s(RHEL version: %s), which is below the minimum required RHEL version %s", + version, versionToCompare, ctrlcommon.RHCOSVersionBootImageSkewLimit) + } + + klog.V(4).Infof("RHCOS boot image version %s meets minimum version requirement (>= %s)", + version, ctrlcommon.RHCOSVersionBootImageSkewLimit) + return false, "" +} diff --git a/pkg/operator/status_test.go b/pkg/operator/status_test.go index f3976a5554..0e65a54714 100644 --- a/pkg/operator/status_test.go +++ b/pkg/operator/status_test.go @@ -23,8 +23,10 @@ import ( configv1 "github.com/openshift/api/config/v1" features "github.com/openshift/api/features" mcfgv1 "github.com/openshift/api/machineconfiguration/v1" + opv1 "github.com/openshift/api/operator/v1" fakeconfigclientset "github.com/openshift/client-go/config/clientset/versioned/fake" configlistersv1 "github.com/openshift/client-go/config/listers/config/v1" + mcoplistersv1 "github.com/openshift/client-go/operator/listers/operator/v1" cov1helpers "github.com/openshift/library-go/pkg/config/clusteroperator/v1helpers" ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" "github.com/openshift/machine-config-operator/test/helpers" @@ -795,3 +797,378 @@ func TestInClusterBringUpStayOnErr(t *testing.T) { assert.False(t, optr.inClusterBringup) } + +func TestCheckBootImageSkewUpgradeableGuard(t *testing.T) { + tests := []struct { + name string + featureGateEnabled bool + mcop *opv1.MachineConfiguration + mcopNotFound bool + mcopGetError error + expectUpgradeBlock bool + expectMessage string + expectError bool + }{ + { + name: "feature gate disabled", + featureGateEnabled: false, + mcop: nil, + expectUpgradeBlock: false, + expectMessage: "", + expectError: false, + }, + { + name: "MachineConfiguration not found", + featureGateEnabled: true, + mcopNotFound: true, + expectUpgradeBlock: false, + expectMessage: "", + expectError: false, + }, + { + name: "mode is None", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusNone, + }, + }, + }, + expectUpgradeBlock: false, + expectMessage: "", + expectError: false, + }, + { + name: "mode is unset", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: "", + }, + }, + }, + expectUpgradeBlock: false, + expectMessage: "", + expectError: false, + }, + // OCP version tests in automatic mode + { + name: "mode is Automatic with OCP version within limit", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusAutomatic, + Automatic: opv1.ClusterBootImageAutomatic{ + OCPVersion: "4.17.0", + }, + }, + }, + }, + expectUpgradeBlock: false, + expectMessage: "", + expectError: false, + }, + { + name: "mode is Automatic with OCP version below limit", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusAutomatic, + Automatic: opv1.ClusterBootImageAutomatic{ + OCPVersion: "4.12.0", + }, + }, + }, + }, + expectUpgradeBlock: true, + expectMessage: "Upgrades have been disabled because the cluster is using OCP boot image version 4.12.0, which is below the minimum required version " + ctrlcommon.OCPVersionBootImageSkewLimit + ". To enable upgrades, please update your boot images following the documentation at [TODO: insert link], or disable boot image skew enforcement at [TODO: insert link]", + expectError: false, + }, + // OCP version tests in manual mode + { + name: "mode is Manual with OCP version within limit", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusManual, + Manual: opv1.ClusterBootImageManual{ + Mode: opv1.ClusterBootImageSpecModeOCPVersion, + OCPVersion: "4.16.0", + }, + }, + }, + }, + expectUpgradeBlock: false, + expectMessage: "", + expectError: false, + }, + { + name: "mode is Manual with OCP version below limit", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusManual, + Manual: opv1.ClusterBootImageManual{ + Mode: opv1.ClusterBootImageSpecModeOCPVersion, + OCPVersion: "4.10.0", + }, + }, + }, + }, + expectUpgradeBlock: true, + expectMessage: "Upgrades have been disabled because the cluster is using OCP boot image version 4.10.0, which is below the minimum required version " + ctrlcommon.OCPVersionBootImageSkewLimit + ". To enable upgrades, please update your boot images following the documentation at [TODO: insert link], or disable boot image skew enforcement at [TODO: insert link]", + expectError: false, + }, + { + name: "mode is Automatic with exact minimum OCP version", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusAutomatic, + Automatic: opv1.ClusterBootImageAutomatic{ + OCPVersion: ctrlcommon.OCPVersionBootImageSkewLimit, + }, + }, + }, + }, + expectUpgradeBlock: false, + expectMessage: "", + expectError: false, + }, + // RHCOS version tests in Automatic mode + { + name: "mode is Automatic with modern RHCOS version within limit", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusAutomatic, + Automatic: opv1.ClusterBootImageAutomatic{ + RHCOSVersion: "9.4.20251023-0", + }, + }, + }, + }, + expectUpgradeBlock: false, + expectMessage: "", + expectError: false, + }, + { + name: "mode is Automatic with modern RHCOS version below limit", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusAutomatic, + Automatic: opv1.ClusterBootImageAutomatic{ + RHCOSVersion: "9.0.20251023-0", + }, + }, + }, + }, + expectUpgradeBlock: true, + expectMessage: "Upgrades have been disabled because the cluster is using RHCOS boot image version 9.0.20251023-0(RHEL version: 9.0), which is below the minimum required RHEL version " + ctrlcommon.RHCOSVersionBootImageSkewLimit + ". To enable upgrades, please update your boot images following the documentation at [TODO: insert link], or disable boot image skew enforcement at [TODO: insert link]", + expectError: false, + }, + { + name: "mode is Automatic with legacy RHCOS version within limit", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusAutomatic, + Automatic: opv1.ClusterBootImageAutomatic{ + RHCOSVersion: "416.94.202510081640-0", + }, + }, + }, + }, + expectUpgradeBlock: false, + expectMessage: "", + expectError: false, + }, + { + name: "mode is Automatic with legacy RHCOS version below limit", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusAutomatic, + Automatic: opv1.ClusterBootImageAutomatic{ + RHCOSVersion: "411.86.202308081056-0", + }, + }, + }, + }, + expectUpgradeBlock: true, + expectMessage: "Upgrades have been disabled because the cluster is using RHCOS boot image version 411.86.202308081056-0(RHEL version: 8.6), which is below the minimum required RHEL version " + ctrlcommon.RHCOSVersionBootImageSkewLimit + ". To enable upgrades, please update your boot images following the documentation at [TODO: insert link], or disable boot image skew enforcement at [TODO: insert link]", + expectError: false, + }, + + { + name: "mode is Automatic with exact minimum RHCOS version", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusAutomatic, + Automatic: opv1.ClusterBootImageAutomatic{ + RHCOSVersion: "413.92.202402131523-0", + }, + }, + }, + }, + expectUpgradeBlock: false, + expectMessage: "", + expectError: false, + }, + // RHCOS version tests in manual mode + { + name: "mode is Manual with modern RHCOS version within limit", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusManual, + Manual: opv1.ClusterBootImageManual{ + Mode: opv1.ClusterBootImageSpecModeRHCOSVersion, + RHCOSVersion: "9.6.20251023-0", + }, + }, + }, + }, + expectUpgradeBlock: false, + expectMessage: "", + expectError: false, + }, + { + name: "mode is Manual with modern RHCOS version below limit", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusManual, + Manual: opv1.ClusterBootImageManual{ + Mode: opv1.ClusterBootImageSpecModeRHCOSVersion, + RHCOSVersion: "9.1.20251023-0", + }, + }, + }, + }, + expectUpgradeBlock: true, + expectMessage: "Upgrades have been disabled because the cluster is using RHCOS boot image version 9.1.20251023-0(RHEL version: 9.1), which is below the minimum required RHEL version " + ctrlcommon.RHCOSVersionBootImageSkewLimit + ". To enable upgrades, please update your boot images following the documentation at [TODO: insert link], or disable boot image skew enforcement at [TODO: insert link]", + expectError: false, + }, + { + name: "mode is Manual with legacy RHCOS version within limit", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusManual, + Manual: opv1.ClusterBootImageManual{ + Mode: opv1.ClusterBootImageSpecModeRHCOSVersion, + RHCOSVersion: "416.94.202510081640-0", + }, + }, + }, + }, + expectUpgradeBlock: false, + expectMessage: "", + expectError: false, + }, + { + name: "mode is Manual with legacy RHCOS version below limit", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusManual, + Manual: opv1.ClusterBootImageManual{ + Mode: opv1.ClusterBootImageSpecModeRHCOSVersion, + RHCOSVersion: "411.86.202308081056-0", + }, + }, + }, + }, + expectUpgradeBlock: true, + expectMessage: "Upgrades have been disabled because the cluster is using RHCOS boot image version 411.86.202308081056-0(RHEL version: 8.6), which is below the minimum required RHEL version " + ctrlcommon.RHCOSVersionBootImageSkewLimit + ". To enable upgrades, please update your boot images following the documentation at [TODO: insert link], or disable boot image skew enforcement at [TODO: insert link]", + expectError: false, + }, + { + name: "mode is manual with exact minimum RHCOS version", + featureGateEnabled: true, + mcop: &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: ctrlcommon.MCOOperatorKnobsObjectName}, + Status: opv1.MachineConfigurationStatus{ + BootImageSkewEnforcementStatus: opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusManual, + Manual: opv1.ClusterBootImageManual{ + Mode: opv1.ClusterBootImageSpecModeRHCOSVersion, + RHCOSVersion: "413.92.202402131523-0", + }, + }, + }, + }, + expectUpgradeBlock: false, + expectMessage: "", + expectError: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // Set up feature gate handler + enabledFeatures := []configv1.FeatureGateName{} + if tc.featureGateEnabled { + enabledFeatures = append(enabledFeatures, features.FeatureGateBootImageSkewEnforcement) + } + fgHandler := ctrlcommon.NewFeatureGatesHardcodedHandler(enabledFeatures, []configv1.FeatureGateName{}) + + // Set up mcopLister + mcopIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) + if tc.mcop != nil && !tc.mcopNotFound { + mcopIndexer.Add(tc.mcop) + } + mcopLister := mcoplistersv1.NewMachineConfigurationLister(mcopIndexer) + + optr := &Operator{ + fgHandler: fgHandler, + mcopLister: mcopLister, + } + + upgradeBlock, message, err := optr.checkBootImageSkewUpgradeableGuard() + + if tc.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + assert.Equal(t, tc.expectUpgradeBlock, upgradeBlock, "upgrade block mismatch") + assert.Equal(t, tc.expectMessage, message, "message mismatch") + }) + } +} diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 5cb26773d5..663faea112 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -29,6 +29,7 @@ import ( kubeErrs "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/jsonmergepatch" "k8s.io/apimachinery/pkg/util/sets" + k8sversion "k8s.io/apimachinery/pkg/util/version" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/retry" @@ -2276,9 +2277,10 @@ func (optr *Operator) syncMachineConfiguration(_ *renderConfig, _ *configv1.Clus } defaultOptInEvent := false + supportsBootImageUpdates := ctrlcommon.CheckBootImagePlatform(infra, optr.fgHandler) // Populate the default boot images configuration in the status, if the cluster is on a // boot image updates supported platform - if ctrlcommon.CheckBootImagePlatform(infra, optr.fgHandler) { + if supportsBootImageUpdates { // Mirror the spec if it is defined, i.e. admin has an opinion if mcop.Spec.ManagedBootImages.MachineManagers != nil { newMachineConfigurationStatus.ManagedBootImagesStatus = *mcop.Spec.ManagedBootImages.DeepCopy() @@ -2301,6 +2303,9 @@ func (optr *Operator) syncMachineConfiguration(_ *renderConfig, _ *configv1.Clus } } + // Update skew enforcement status if needed + optr.syncBootImageSkewEnforcementStatus(mcop, newMachineConfigurationStatus, supportsBootImageUpdates) + newMachineConfigurationStatus.ObservedGeneration = mcop.GetGeneration() // Check if any changes are required in the Status before making the API call. if !reflect.DeepEqual(mcop.Status, *newMachineConfigurationStatus) { @@ -2417,3 +2422,75 @@ func (optr *Operator) syncPreBuiltImageMachineConfigs() error { return nil } + +// syncBootImageSkewEnforcementStatus determines the appropriate BootImageSkewEnforcementStatus based on +// the MachineConfiguration spec, platform defaults, and cluster version information. +func (optr *Operator) syncBootImageSkewEnforcementStatus(mcop *opv1.MachineConfiguration, newMachineConfigurationStatus *opv1.MachineConfigurationStatus, supportsBootImageUpdates bool) { + // React to any changes in boot image skew enforcement configuration + if !optr.fgHandler.Enabled(features.FeatureGateBootImageSkewEnforcement) { + return + } + + // First, estimate an OCPVersion from ClusterVersion. + ocpVersionAtInstall := optr.getOCPVersionFromClusterVersion() + // If BootImageSkewEnforcement spec is defined, reflect that to status + //nolint:gocritic + if mcop.Spec.BootImageSkewEnforcement != (opv1.BootImageSkewEnforcementConfig{}) { + if mcop.Spec.BootImageSkewEnforcement.Mode == opv1.BootImageSkewEnforcementConfigModeManual { + newMachineConfigurationStatus.BootImageSkewEnforcementStatus = opv1.BootImageSkewEnforcementStatus{ + Mode: opv1.BootImageSkewEnforcementModeStatusManual, + Manual: mcop.Spec.BootImageSkewEnforcement.Manual, + } + } else { // only other possible opinion is "None" + newMachineConfigurationStatus.BootImageSkewEnforcementStatus = apihelpers.GetSkewEnforcementStatusNone() + } + } else if supportsBootImageUpdates { + // If an "All" option is specified and BootImageSkewEnforcementStatus is empty or not set to Automatic => set Mode to Automatic. + if reflect.DeepEqual(newMachineConfigurationStatus.ManagedBootImagesStatus, apihelpers.GetManagedBootImagesWithUpdateEnabled()) { + if (newMachineConfigurationStatus.BootImageSkewEnforcementStatus.Mode != opv1.BootImageSkewEnforcementModeStatusAutomatic || + mcop.Status.BootImageSkewEnforcementStatus == opv1.BootImageSkewEnforcementStatus{}) { + newMachineConfigurationStatus.BootImageSkewEnforcementStatus = apihelpers.GetSkewEnforcementStatusAutomaticWithOCPVersion(ocpVersionAtInstall) + } + } else { // For any other opinion i.e. admin has overridden boot image opinion to Partial/None => set Mode to Manual. + newMachineConfigurationStatus.BootImageSkewEnforcementStatus = apihelpers.GetSkewEnforcementStatusManualWithOCPVersion(ocpVersionAtInstall) + } + } else { // For platforms that do not support automated boot image updates => set Mode to Manual. + newMachineConfigurationStatus.BootImageSkewEnforcementStatus = apihelpers.GetSkewEnforcementStatusManualWithOCPVersion(ocpVersionAtInstall) + } +} + +// getOCPVersionFromClusterVersion extracts the OCP version from ClusterVersion history. +// It finds the last completed update in history (install version) and parses it to a clean version string. +// Returns an empty string if ClusterVersion cannot be retrieved or parsed. +func (optr *Operator) getOCPVersionFromClusterVersion() string { + clusterVersion, err := optr.clusterVersionLister.Get("version") + if err != nil { + klog.Warningf("Failed to get ClusterVersion: %v, skipping boot image skew enforcement configuration", err) + return "" + } + if len(clusterVersion.Status.History) == 0 { + klog.Warningf("ClusterVersion has no history, skipping boot image skew enforcement configuration") + return "" + } + + // History is ordered by recency (newest first), so find the last completed entry (install version) + var installVersion string + for i := len(clusterVersion.Status.History) - 1; i >= 0; i-- { + if clusterVersion.Status.History[i].State == configv1.CompletedUpdate { + installVersion = clusterVersion.Status.History[i].Version + break + } + } + // ClusterVersion has no completed updates in history(install likely on-going), default to last value + if installVersion == "" { + klog.Warningf("ClusterVersion has no completed updates in history(install likely on-going), default to last value") + installVersion = clusterVersion.Status.History[len(clusterVersion.Status.History)-1].Version + } + // Scrape away CI/nightly tags if needed + parsedVersion, err := k8sversion.ParseGeneric(installVersion) + if err != nil { + klog.Warningf("Failed to parse install version %q: %v, use a placeholder for now", installVersion, err) + return "0.0.0" + } + return fmt.Sprintf("%d.%d.%d", parsedVersion.Major(), parsedVersion.Minor(), parsedVersion.Patch()) +} diff --git a/pkg/operator/sync_test.go b/pkg/operator/sync_test.go index 158f262c38..8c98a63be0 100644 --- a/pkg/operator/sync_test.go +++ b/pkg/operator/sync_test.go @@ -304,66 +304,158 @@ func TestSyncMachineConfiguration(t *testing.T) { name string mcop *opv1.MachineConfiguration infra *configv1.Infrastructure + clusterVersion *configv1.ClusterVersion expectedManagedBootImagesStatus opv1.ManagedBootImages + expectedSkewEnforcementStatus opv1.BootImageSkewEnforcementStatus annotationExpected bool }{ { name: "AWS platform, no existing config, opt-in expected", infra: buildInfra(withPlatformType(configv1.AWSPlatformType)), mcop: buildMachineConfigurationWithNoBootImageConfiguration(), + clusterVersion: buildClusterVersion("4.18.0"), annotationExpected: true, expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithUpdateEnabled(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusAutomaticWithOCPVersion("4.18.0"), }, { name: "AWS platform, existing enabled config, no opt-in expected", infra: buildInfra(withPlatformType(configv1.AWSPlatformType)), mcop: buildMachineConfigurationWithBootImageUpdateEnabled(), + clusterVersion: buildClusterVersion("4.18.0"), annotationExpected: false, expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithUpdateEnabled(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusAutomaticWithOCPVersion("4.18.0"), }, { name: "AWS platform, existing disabled config, no opt-in expected", infra: buildInfra(withPlatformType(configv1.AWSPlatformType)), mcop: buildMachineConfigurationWithBootImageUpdateDisabled(), + clusterVersion: buildClusterVersion("4.18.0"), annotationExpected: false, expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithUpdateDisabled(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusManualWithOCPVersion("4.18.0"), }, { name: "GCP platform, no existing config, opt-in expected", infra: buildInfra(withPlatformType(configv1.GCPPlatformType)), mcop: buildMachineConfigurationWithNoBootImageConfiguration(), + clusterVersion: buildClusterVersion("4.18.0"), annotationExpected: true, expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithUpdateEnabled(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusAutomaticWithOCPVersion("4.18.0"), }, { name: "GCP platform, existing enabled config, no opt-in expected", infra: buildInfra(withPlatformType(configv1.GCPPlatformType)), mcop: buildMachineConfigurationWithBootImageUpdateEnabled(), + clusterVersion: buildClusterVersion("4.18.0"), annotationExpected: false, expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithUpdateEnabled(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusAutomaticWithOCPVersion("4.18.0"), }, { name: "GCP platform, existing disabled config, no opt-in expected", infra: buildInfra(withPlatformType(configv1.GCPPlatformType)), mcop: buildMachineConfigurationWithBootImageUpdateDisabled(), + clusterVersion: buildClusterVersion("4.18.0"), annotationExpected: false, expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithUpdateDisabled(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusManualWithOCPVersion("4.18.0"), }, { name: "Azure platform, no existing config, no opt-in expected", infra: buildInfra(withPlatformType(configv1.AzurePlatformType)), mcop: buildMachineConfigurationWithNoBootImageConfiguration(), + clusterVersion: buildClusterVersion("4.18.0"), annotationExpected: false, expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithUpdateDisabled(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusManualWithOCPVersion("4.18.0"), }, { name: "vsphere platform, no existing config, no opt-in expected", infra: buildInfra(withPlatformType(configv1.VSpherePlatformType)), mcop: buildMachineConfigurationWithNoBootImageConfiguration(), + clusterVersion: buildClusterVersion("4.18.0"), annotationExpected: false, expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithUpdateDisabled(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusManualWithOCPVersion("4.18.0"), + }, + // Skew enforcement test cases + { + name: "AWS platform, boot images enabled, skew enforcement automatic mode expected", + infra: buildInfra(withPlatformType(configv1.AWSPlatformType)), + mcop: buildMachineConfigurationWithBootImageEnabledAndNoSkewEnforcement(), + clusterVersion: buildClusterVersion("4.18.0"), + annotationExpected: false, + expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithUpdateEnabled(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusAutomaticWithOCPVersion("4.18.0"), + }, + { + name: "AWS platform, boot images disabled, skew enforcement manual mode expected", + infra: buildInfra(withPlatformType(configv1.AWSPlatformType)), + mcop: buildMachineConfigurationWithBootImageDisabledAndNoSkewEnforcement(), + clusterVersion: buildClusterVersion("4.17.0"), + annotationExpected: false, + expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithUpdateDisabled(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusManualWithOCPVersion("4.17.0"), + }, + { + name: "AWS platform, spec defines manual mode, status should reflect spec", + infra: buildInfra(withPlatformType(configv1.AWSPlatformType)), + mcop: buildMachineConfigurationWithSkewEnforcementManual("4.16.0"), + clusterVersion: buildClusterVersion("4.18.0"), + annotationExpected: true, + expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithUpdateEnabled(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusManualWithOCPVersion("4.16.0"), + }, + { + name: "AWS platform, spec defines none mode, status should reflect none", + infra: buildInfra(withPlatformType(configv1.AWSPlatformType)), + mcop: buildMachineConfigurationWithSkewEnforcementNone(), + clusterVersion: buildClusterVersion("4.18.0"), + annotationExpected: true, + expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithUpdateEnabled(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusNone(), + }, + { + name: "GCP platform, boot images enabled, skew enforcement automatic mode expected", + infra: buildInfra(withPlatformType(configv1.GCPPlatformType)), + mcop: buildMachineConfigurationWithBootImageEnabledAndNoSkewEnforcement(), + clusterVersion: buildClusterVersion("4.19.1"), + annotationExpected: false, + expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithUpdateEnabled(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusAutomaticWithOCPVersion("4.19.1"), + }, + { + name: "BareMetal platform (unsupported), skew enforcement manual mode expected", + infra: buildInfra(withPlatformType(configv1.BareMetalPlatformType)), + mcop: buildMachineConfigurationWithNoBootImageConfiguration(), + clusterVersion: buildClusterVersion("4.18.0"), + annotationExpected: false, + expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithNoConfiguration(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusManualWithOCPVersion("4.18.0"), + }, + { + name: "AWS platform, cluster version with multiple history entries", + infra: buildInfra(withPlatformType(configv1.AWSPlatformType)), + mcop: buildMachineConfigurationWithBootImageEnabledAndNoSkewEnforcement(), + clusterVersion: buildClusterVersionWithMultipleHistory("4.19.0", "4.18.0", "4.17.0"), + annotationExpected: false, + expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithUpdateEnabled(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusAutomaticWithOCPVersion("4.17.0"), + }, + { + name: "AWS platform, CI version format should be parsed correctly", + infra: buildInfra(withPlatformType(configv1.AWSPlatformType)), + mcop: buildMachineConfigurationWithBootImageEnabledAndNoSkewEnforcement(), + clusterVersion: buildClusterVersion("4.18.0-0.ci-2024-01-01-000000"), + annotationExpected: false, + expectedManagedBootImagesStatus: apihelpers.GetManagedBootImagesWithUpdateEnabled(), + expectedSkewEnforcementStatus: apihelpers.GetSkewEnforcementStatusAutomaticWithOCPVersion("4.18.0"), }, } + for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { infraIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) @@ -371,15 +463,21 @@ func TestSyncMachineConfiguration(t *testing.T) { mcopIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) mcopIndexer.Add(tc.mcop) mcpIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) + clusterVersionIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) + if tc.clusterVersion != nil { + clusterVersionIndexer.Add(tc.clusterVersion) + } + optr := &Operator{ eventRecorder: &record.FakeRecorder{}, fgHandler: ctrlcommon.NewFeatureGatesHardcodedHandler( - []configv1.FeatureGateName{features.FeatureGateManagedBootImages, features.FeatureGateManagedBootImagesAWS, features.FeatureGateManagedBootImagesvSphere, features.FeatureGateManagedBootImagesAzure}, []configv1.FeatureGateName{}, + []configv1.FeatureGateName{features.FeatureGateManagedBootImages, features.FeatureGateManagedBootImagesAWS, features.FeatureGateManagedBootImagesvSphere, features.FeatureGateManagedBootImagesAzure, features.FeatureGateBootImageSkewEnforcement}, []configv1.FeatureGateName{}, ), - infraLister: configlistersv1.NewInfrastructureLister(infraIndexer), - mcopLister: mcoplistersv1.NewMachineConfigurationLister(mcopIndexer), - mcopClient: fakemcopclientset.NewSimpleClientset(tc.mcop), - mcpLister: mcplister.NewMachineConfigPoolLister(mcpIndexer), + infraLister: configlistersv1.NewInfrastructureLister(infraIndexer), + mcopLister: mcoplistersv1.NewMachineConfigurationLister(mcopIndexer), + mcopClient: fakemcopclientset.NewSimpleClientset(tc.mcop), + mcpLister: mcplister.NewMachineConfigPoolLister(mcpIndexer), + clusterVersionLister: configlistersv1.NewClusterVersionLister(clusterVersionIndexer), } err := optr.syncMachineConfiguration(nil, nil) assert.NoError(t, err) @@ -388,6 +486,8 @@ func TestSyncMachineConfiguration(t *testing.T) { // Ensure ManagedBootImagesStatus and annotations are as expected assert.Equal(t, tc.expectedManagedBootImagesStatus, mcop.Status.ManagedBootImagesStatus) assert.Equal(t, tc.annotationExpected, metav1.HasAnnotation(mcop.ObjectMeta, ctrlcommon.BootImageOptedInAnnotation)) + // Ensure BootImageSkewEnforcementStatus is as expected + assert.Equal(t, tc.expectedSkewEnforcementStatus, mcop.Status.BootImageSkewEnforcementStatus) }) } } @@ -403,3 +503,89 @@ func buildMachineConfigurationWithBootImageUpdateEnabled() *opv1.MachineConfigur func buildMachineConfigurationWithNoBootImageConfiguration() *opv1.MachineConfiguration { return &opv1.MachineConfiguration{Spec: opv1.MachineConfigurationSpec{ManagedBootImages: apihelpers.GetManagedBootImagesWithNoConfiguration()}, ObjectMeta: metav1.ObjectMeta{Name: "cluster"}} } + +// Helper functions for building ClusterVersion objects +func buildClusterVersion(version string) *configv1.ClusterVersion { + return &configv1.ClusterVersion{ + ObjectMeta: metav1.ObjectMeta{ + Name: "version", + }, + Status: configv1.ClusterVersionStatus{ + History: []configv1.UpdateHistory{ + { + State: configv1.CompletedUpdate, + Version: version, + }, + }, + }, + } +} + +func buildClusterVersionWithMultipleHistory(versions ...string) *configv1.ClusterVersion { + history := make([]configv1.UpdateHistory, len(versions)) + for i, v := range versions { + state := configv1.CompletedUpdate + if i == 0 { + state = configv1.PartialUpdate // Most recent is partial (in progress) + } + history[i] = configv1.UpdateHistory{ + State: state, + Version: v, + } + } + return &configv1.ClusterVersion{ + ObjectMeta: metav1.ObjectMeta{ + Name: "version", + }, + Status: configv1.ClusterVersionStatus{ + History: history, + }, + } +} + +// Helper functions for building MachineConfiguration with skew enforcement +func buildMachineConfigurationWithSkewEnforcementManual(ocpVersion string) *opv1.MachineConfiguration { + return &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: "cluster"}, + Spec: opv1.MachineConfigurationSpec{ + ManagedBootImages: apihelpers.GetManagedBootImagesWithNoConfiguration(), + BootImageSkewEnforcement: opv1.BootImageSkewEnforcementConfig{ + Mode: opv1.BootImageSkewEnforcementConfigModeManual, + Manual: opv1.ClusterBootImageManual{ + Mode: opv1.ClusterBootImageSpecModeOCPVersion, + OCPVersion: ocpVersion, + }, + }, + }, + } +} + +func buildMachineConfigurationWithSkewEnforcementNone() *opv1.MachineConfiguration { + return &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: "cluster"}, + Spec: opv1.MachineConfigurationSpec{ + ManagedBootImages: apihelpers.GetManagedBootImagesWithNoConfiguration(), + BootImageSkewEnforcement: opv1.BootImageSkewEnforcementConfig{ + Mode: opv1.BootImageSkewEnforcementConfigModeNone, + }, + }, + } +} + +func buildMachineConfigurationWithBootImageEnabledAndNoSkewEnforcement() *opv1.MachineConfiguration { + return &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: "cluster"}, + Spec: opv1.MachineConfigurationSpec{ + ManagedBootImages: apihelpers.GetManagedBootImagesWithUpdateEnabled(), + }, + } +} + +func buildMachineConfigurationWithBootImageDisabledAndNoSkewEnforcement() *opv1.MachineConfiguration { + return &opv1.MachineConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: "cluster"}, + Spec: opv1.MachineConfigurationSpec{ + ManagedBootImages: apihelpers.GetManagedBootImagesWithUpdateDisabled(), + }, + } +}