Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/machine-config-operator/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ func runStartCmd(_ *cobra.Command, _ []string) {
ctrlctx.KubeInformerFactory.Core().V1().Nodes(),
ctrlctx.KubeMAOSharedInformer.Core().V1().Secrets(),
ctrlctx.ConfigInformerFactory.Config().V1().Images(),
ctrlctx.ConfigInformerFactory.Config().V1().ClusterVersions(),
ctrlctx.KubeNamespacedInformerFactory.Core().V1().ServiceAccounts(),
ctrlctx.KubeNamespacedInformerFactory.Core().V1().Secrets(),
ctrlctx.OpenShiftConfigKubeNamespacedInformerFactory.Core().V1().ConfigMaps(),
Expand Down
28 changes: 28 additions & 0 deletions pkg/apihelpers/apihelpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -501,3 +501,31 @@ func GetManagedBootImagesWithUpdateDisabled() opv1.ManagedBootImages {
func GetManagedBootImagesWithNoConfiguration() opv1.ManagedBootImages {
return opv1.ManagedBootImages{}
}

// GetSkewEnforcementStatusAutomaticWithOCPVersion returns a BootImageSkewEnforcementStatus with Automatic mode and the given OCP version.
func GetSkewEnforcementStatusAutomaticWithOCPVersion(ocpVersion string) opv1.BootImageSkewEnforcementStatus {
return opv1.BootImageSkewEnforcementStatus{
Mode: opv1.BootImageSkewEnforcementModeStatusAutomatic,
Automatic: opv1.ClusterBootImageAutomatic{
OCPVersion: ocpVersion,
},
}
}

// GetSkewEnforcementStatusManualWithOCPVersion returns a BootImageSkewEnforcementStatus with Manual mode and the given OCP version.
func GetSkewEnforcementStatusManualWithOCPVersion(ocpVersion string) opv1.BootImageSkewEnforcementStatus {
return opv1.BootImageSkewEnforcementStatus{
Mode: opv1.BootImageSkewEnforcementModeStatusManual,
Manual: opv1.ClusterBootImageManual{
Mode: opv1.ClusterBootImageSpecModeOCPVersion,
OCPVersion: ocpVersion,
},
}
}

// GetSkewEnforcementStatusNone returns a BootImageSkewEnforcementStatus with None mode.
func GetSkewEnforcementStatusNone() opv1.BootImageSkewEnforcementStatus {
return opv1.BootImageSkewEnforcementStatus{
Mode: opv1.BootImageSkewEnforcementModeStatusNone,
}
}
4 changes: 4 additions & 0 deletions pkg/controller/common/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@ const (

// NodeSizingEnabledEnvPath is the file path for the node sizing enabled environment file
NodeSizingEnabledEnvPath = "/etc/node-sizing-enabled.env"
// Current Boot Image Skew Limits
// Note: Update units in status_test.go when the following are bumped
RHCOSVersionBootImageSkewLimit = "9.2"
OCPVersionBootImageSkewLimit = "4.13.0"
)

// Commonly-used MCO ConfigMap names
Expand Down
4 changes: 2 additions & 2 deletions pkg/controller/machine-set-boot-image/cpms_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ func reconcilePlatformCPMS[T any](
configMap *corev1.ConfigMap,
arch string,
secretClient clientset.Interface,
reconcileProviderSpec func(*stream.Stream, string, *osconfigv1.Infrastructure, *T, string, clientset.Interface) (bool, *T, error),
reconcileProviderSpec func(*stream.Stream, string, *osconfigv1.Infrastructure, *T, string, clientset.Interface) (bool, bool, *T, error),
) (patchRequired bool, newCPMS *machinev1.ControlPlaneMachineSet, err error) {
klog.Infof("Reconciling controlplanemachineset %s on %s, with arch %s", cpms.Name, string(infra.Status.PlatformStatus.Type), arch)

Expand All @@ -294,7 +294,7 @@ func reconcilePlatformCPMS[T any](
}

// Reconcile the provider spec
patchRequired, newProviderSpec, err := reconcileProviderSpec(streamData, arch, infra, providerSpec, cpms.Name, secretClient)
patchRequired, _, newProviderSpec, err := reconcileProviderSpec(streamData, arch, infra, providerSpec, cpms.Name, secretClient)
if err != nil {
return false, nil, err
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
k8sversion "k8s.io/apimachinery/pkg/util/version"
coreinformersv1 "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
Expand Down Expand Up @@ -72,6 +73,7 @@ type Controller struct {
// Stats structure for local bookkeeping of machine resources
type MachineResourceStats struct {
inProgress int
skippedCount int
erroredCount int
totalCount int
}
Expand Down Expand Up @@ -379,8 +381,10 @@ func (ctrl *Controller) updateMachineConfiguration(oldMC, newMC interface{}) {
return
}

// Only take action if the there is an actual change in the MachineConfiguration's ManagedBootImagesStatus
if reflect.DeepEqual(oldMachineConfiguration.Status.ManagedBootImagesStatus, newMachineConfiguration.Status.ManagedBootImagesStatus) {
// Only take action if the there is an actual change in the MachineConfiguration's ManagedBootImagesStatus or BootImageSkewEnforcementStatus(when bootimageskewenforcement feature gate is enabled)
if reflect.DeepEqual(oldMachineConfiguration.Status.ManagedBootImagesStatus, newMachineConfiguration.Status.ManagedBootImagesStatus) &&
(!ctrl.fgHandler.Enabled(features.FeatureGateBootImageSkewEnforcement) ||
reflect.DeepEqual(oldMachineConfiguration.Status.BootImageSkewEnforcementStatus, newMachineConfiguration.Status.BootImageSkewEnforcementStatus)) {
return
}

Expand Down Expand Up @@ -457,33 +461,78 @@ func (ctrl *Controller) updateConditions(newReason string, syncError error, targ
}
// Only make an API call if there is an update to the Conditions field
if !reflect.DeepEqual(newConditions, mcop.Status.Conditions) {
ctrl.updateMachineConfigurationStatus(mcop, newConditions)
mcop.Status.Conditions = newConditions
ctrl.updateMachineConfigurationStatus(mcop.Status)
}
}

// updateMachineConfigurationStatus updates the MachineConfiguration status with new conditions
// using retry logic to handle concurrent updates.
func (ctrl *Controller) updateMachineConfigurationStatus(mcop *opv1.MachineConfiguration, newConditions []metav1.Condition) {
// updateClusterBootImage updates the cluster boot image record if the skew enforcement is set to Automatic mode.
func (ctrl *Controller) updateClusterBootImage() {
ctrl.conditionMutex.Lock()
defer ctrl.conditionMutex.Unlock()
mcop, err := ctrl.mcopClient.OperatorV1().MachineConfigurations().Get(context.TODO(), ctrlcommon.MCOOperatorKnobsObjectName, metav1.GetOptions{})
if err != nil {
klog.Errorf("error updating cluster boot image record: %s", err)
return
}
// No action to take if not in automatic mode
if mcop.Status.BootImageSkewEnforcementStatus.Mode != opv1.BootImageSkewEnforcementModeStatusAutomatic {
return
}

// Get OCP version of last boot image update from configmap
configMap, err := ctrl.mcoCmLister.ConfigMaps(ctrlcommon.MCONamespace).Get(ctrlcommon.BootImagesConfigMapName)
if err != nil {
klog.Warningf("Failed to get boot images configmap: %v, skipping cluster boot image record update", err)
return
}

releaseVersion, found := configMap.Data[ctrlcommon.OCPReleaseVersionKey]
if !found {
klog.Warningf("OCP release version not found in boot images configmap, skipping cluster boot image record update")
return
}

// Parse and extract semantic version (major.minor.patch) for API validation
parsedVersion, err := k8sversion.ParseGeneric(releaseVersion)
if err != nil {
klog.Warningf("Failed to parse release version %q from configmap: %v, skipping cluster boot image record update", releaseVersion, err)
return
}
ocpVersion := fmt.Sprintf("%d.%d.%d", parsedVersion.Major(), parsedVersion.Minor(), parsedVersion.Patch())

newBootImageSkewEnforcementStatus := mcop.Status.BootImageSkewEnforcementStatus.DeepCopy()
newBootImageSkewEnforcementStatus.Automatic = opv1.ClusterBootImageAutomatic{
OCPVersion: ocpVersion,
}

// Only make an API call if there is an update to the skew enforcement status
if !reflect.DeepEqual(mcop.Status.BootImageSkewEnforcementStatus, newBootImageSkewEnforcementStatus) {
mcop.Status.BootImageSkewEnforcementStatus = *newBootImageSkewEnforcementStatus
ctrl.updateMachineConfigurationStatus(mcop.Status)
}
}

// updateMachineConfigurationStatus updates the MachineConfiguration status using retry logic to handle concurrent updates.
func (ctrl *Controller) updateMachineConfigurationStatus(mcopStatus opv1.MachineConfigurationStatus) {
// Using a retry here as there may be concurrent reconiliation loops updating conditions for multiple
// resources at the same time and their local stores may be out of date
if !reflect.DeepEqual(mcop.Status.Conditions, newConditions) {
klog.V(4).Infof("%v", newConditions)
if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
mcop, err := ctrl.mcopClient.OperatorV1().MachineConfigurations().Get(context.TODO(), ctrlcommon.MCOOperatorKnobsObjectName, metav1.GetOptions{})
if err != nil {
return err
}
mcop.Status.Conditions = newConditions
_, err = ctrl.mcopClient.OperatorV1().MachineConfigurations().UpdateStatus(context.TODO(), mcop, metav1.UpdateOptions{})
if err != nil {
return err
}
return nil
}); err != nil {
klog.Errorf("error updating MachineConfiguration status: %v", err)
klog.V(2).Infof("MachineConfiguration status update: %v", mcopStatus)
if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
mcop, err := ctrl.mcopClient.OperatorV1().MachineConfigurations().Get(context.TODO(), ctrlcommon.MCOOperatorKnobsObjectName, metav1.GetOptions{})
if err != nil {
return err
}
mcop.Status = mcopStatus
_, err = ctrl.mcopClient.OperatorV1().MachineConfigurations().UpdateStatus(context.TODO(), mcop, metav1.UpdateOptions{})
if err != nil {
return err
}
return nil
}); err != nil {
klog.Errorf("error updating MachineConfiguration status: %v", err)
}

}

// getDefaultConditions returns the default boot image update conditions when no
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -740,7 +740,7 @@ func TestReconcileAzureProviderSpec(t *testing.T) {
testStreamData = tt.streamData
}

patchRequired, updatedProviderSpec, err := reconcileAzureProviderSpec(
patchRequired, _, updatedProviderSpec, err := reconcileAzureProviderSpec(
testStreamData,
tt.arch,
infra,
Expand Down
42 changes: 26 additions & 16 deletions pkg/controller/machine-set-boot-image/ms_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"strings"
"time"

"github.com/openshift/api/features"
machinev1beta1 "github.com/openshift/api/machine/v1beta1"
opv1 "github.com/openshift/api/operator/v1"
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
Expand Down Expand Up @@ -95,23 +96,32 @@ func (ctrl *Controller) syncMAPIMachineSets(reason string) {
ctrl.updateConditions(reason, nil, opv1.MachineConfigurationBootImageUpdateProgressing)

for _, machineSet := range mapiMachineSets {
err := ctrl.syncMAPIMachineSet(machineSet)
patchSkipped, err := ctrl.syncMAPIMachineSet(machineSet)
if err == nil {
ctrl.mapiStats.inProgress++
} else {
klog.Errorf("Error syncing MAPI MachineSet %v", err)
syncErrors = append(syncErrors, fmt.Errorf("error syncing MAPI MachineSet %s: %v", machineSet.Name, err))
ctrl.mapiStats.erroredCount++
}
if patchSkipped {
ctrl.mapiStats.skippedCount++
}
// Update progressing conditions every step of the loop
ctrl.updateConditions(reason, nil, opv1.MachineConfigurationBootImageUpdateProgressing)
}
// Update/Clear degrade conditions based on errors from this loop
ctrl.updateConditions(reason, kubeErrs.NewAggregate(syncErrors), opv1.MachineConfigurationBootImageUpdateDegraded)
// If no machinesets were skipped, update the cluster boot image record
if ctrl.fgHandler.Enabled(features.FeatureGateBootImageSkewEnforcement) {
if ctrl.mapiStats.skippedCount == 0 {
ctrl.updateClusterBootImage()
}
}
}

// syncMAPIMachineSet will attempt to reconcile the provided machineset
func (ctrl *Controller) syncMAPIMachineSet(machineSet *machinev1beta1.MachineSet) error {
func (ctrl *Controller) syncMAPIMachineSet(machineSet *machinev1beta1.MachineSet) (bool, error) {

startTime := time.Now()
klog.V(4).Infof("Started syncing MAPI machineset %q (%v)", machineSet.Name, startTime)
Expand All @@ -123,26 +133,26 @@ func (ctrl *Controller) syncMAPIMachineSet(machineSet *machinev1beta1.MachineSet
// that the machineset may be managed by another workflow and should not be reconciled.
if len(machineSet.GetOwnerReferences()) != 0 {
klog.Infof("machineset %s has OwnerReference: %v, skipping boot image update", machineSet.GetOwnerReferences()[0].Kind+"/"+machineSet.GetOwnerReferences()[0].Name, machineSet.Name)
return nil
return true, nil
}

if os, ok := machineSet.Spec.Template.Labels[OSLabelKey]; ok {
if os == "Windows" {
klog.Infof("machineset %s has a windows os label, skipping boot image update", machineSet.Name)
return nil
return false, nil
}
}

// Fetch the architecture type of this machineset
arch, err := getArchFromMachineSet(machineSet)
if err != nil {
return fmt.Errorf("failed to fetch arch during machineset sync: %w", err)
return false, fmt.Errorf("failed to fetch arch during machineset sync: %w", err)
}

// Fetch the infra object to determine the platform type
infra, err := ctrl.infraLister.Get("cluster")
if err != nil {
return fmt.Errorf("failed to fetch infra object during machineset sync: %w", err)
return false, fmt.Errorf("failed to fetch infra object during machineset sync: %w", err)
}

// Fetch the bootimage configmap & ensure it has been stamped by the operator. This is done by
Expand All @@ -151,44 +161,44 @@ func (ctrl *Controller) syncMAPIMachineSet(machineSet *machinev1beta1.MachineSet
// If it hasn't been updated, exit and wait for a resync.
configMap, err := ctrl.mcoCmLister.ConfigMaps(ctrlcommon.MCONamespace).Get(ctrlcommon.BootImagesConfigMapName)
if err != nil {
return fmt.Errorf("failed to fetch coreos-bootimages config map during machineset sync: %w", err)
return false, fmt.Errorf("failed to fetch coreos-bootimages config map during machineset sync: %w", err)
}
versionHashFromCM, versionHashFound := configMap.Data[ctrlcommon.MCOVersionHashKey]
if !versionHashFound {
klog.Infof("failed to find mco version hash in %s configmap, sync will exit to wait for the MCO upgrade to complete", ctrlcommon.BootImagesConfigMapName)
return nil
return true, nil
}
if versionHashFromCM != operatorversion.Hash {
klog.Infof("mismatch between MCO hash version stored in configmap and current MCO version; sync will exit to wait for the MCO upgrade to complete")
return nil
return true, nil
}
releaseVersionFromCM, releaseVersionFound := configMap.Data[ctrlcommon.OCPReleaseVersionKey]
if !releaseVersionFound {
klog.Infof("failed to find OCP release version in %s configmap, sync will exit to wait for the MCO upgrade to complete", ctrlcommon.BootImagesConfigMapName)
return nil
return true, nil
}
if releaseVersionFromCM != operatorversion.ReleaseVersion {
klog.Infof("mismatch between OCP release version stored in configmap and current MCO release version; sync will exit to wait for the MCO upgrade to complete")
return nil
return true, nil
}

// Check if the this MachineSet requires an update
patchRequired, newMachineSet, err := checkMachineSet(infra, machineSet, configMap, arch, ctrl.kubeClient)
patchRequired, patchSkipped, newMachineSet, err := checkMachineSet(infra, machineSet, configMap, arch, ctrl.kubeClient)
if err != nil {
return fmt.Errorf("failed to reconcile machineset %s, err: %w", machineSet.Name, err)
return false, fmt.Errorf("failed to reconcile machineset %s, err: %w", machineSet.Name, err)
}

// Patch the machineset if required
if patchRequired {
// First, check if we're hot looping
if ctrl.checkMAPIMachineSetHotLoop(newMachineSet) {
return fmt.Errorf("refusing to reconcile machineset %s, hot loop detected. Please opt-out of boot image updates, adjust your machine provisioning workflow to prevent hot loops and opt back in to resume boot image updates", machineSet.Name)
return false, fmt.Errorf("refusing to reconcile machineset %s, hot loop detected. Please opt-out of boot image updates, adjust your machine provisioning workflow to prevent hot loops and opt back in to resume boot image updates", machineSet.Name)
}
klog.Infof("Patching MAPI machineset %s", machineSet.Name)
return ctrl.patchMachineSet(machineSet, newMachineSet)
return false, ctrl.patchMachineSet(machineSet, newMachineSet)
}
klog.Infof("No patching required for MAPI machineset %s", machineSet.Name)
return nil
return patchSkipped, nil
}

// Checks against a local store of boot image updates to detect hot looping
Expand Down
Loading