Skip to content

Commit 90af2d0

Browse files
Backport: fix(vm): fix some volume migration bugs (#1505)
fix(vm): fix some volume migration bugs (#1498) This PR fixes several critical bugs related to volume migration in VM operations: 1. Fixed Migrating Condition and Generation Sync Problem: Incorrect handling of the Migrating condition and lack of proper synchronization. Solution: Added proper synchronization logic and correct setting of observedGeneration to ensure state consistency. 2. Blocked RWO Volume Migration with Immediate Failure Problem: Attempting to migrate VM with RWO (ReadWriteOnce) volumes was causing undefined behavior. Solution: VMOP now immediately fails the migration operation when RWO volumes are detected, as this configuration is not supported. Added e2e test. 3. Fixed Pending State Hang and Volume Sync on KVVM Problem: VM operations could hang indefinitely in Pending state due to volume synchronization issues on KVVM. Solution: Implemented proper volume synchronization logic to prevent hanging and ensure smooth state transitions. 4. Prevented Unplanned Restarts from Unexpected State Problem: Presence of unexpected state could trigger unplanned VM restarts. Solution: Added validation and handling logic to prevent restarts caused by unexpected state conditions. Signed-off-by: Yaroslav Borbat <[email protected]> Co-authored-by: Yaroslav Borbat <[email protected]>
1 parent a67d050 commit 90af2d0

File tree

15 files changed

+333
-238
lines changed

15 files changed

+333
-238
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
Copyright 2025 Flant JSC
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package vmbda
18+
19+
import (
20+
"github.com/deckhouse/virtualization-controller/pkg/builder/meta"
21+
"github.com/deckhouse/virtualization/api/core/v1alpha2"
22+
)
23+
24+
type Option func(vd *v1alpha2.VirtualMachineBlockDeviceAttachment)
25+
26+
var (
27+
WithName = meta.WithName[*v1alpha2.VirtualMachineBlockDeviceAttachment]
28+
WithNamespace = meta.WithNamespace[*v1alpha2.VirtualMachineBlockDeviceAttachment]
29+
WithGenerateName = meta.WithGenerateName[*v1alpha2.VirtualMachineBlockDeviceAttachment]
30+
WithLabel = meta.WithLabel[*v1alpha2.VirtualMachineBlockDeviceAttachment]
31+
WithLabels = meta.WithLabels[*v1alpha2.VirtualMachineBlockDeviceAttachment]
32+
WithAnnotation = meta.WithAnnotation[*v1alpha2.VirtualMachineBlockDeviceAttachment]
33+
WithAnnotations = meta.WithAnnotations[*v1alpha2.VirtualMachineBlockDeviceAttachment]
34+
WithFinalizer = meta.WithFinalizer[*v1alpha2.VirtualMachineBlockDeviceAttachment]
35+
)
36+
37+
func WithVirtualMachineName(name string) func(vmbda *v1alpha2.VirtualMachineBlockDeviceAttachment) {
38+
return func(vmbda *v1alpha2.VirtualMachineBlockDeviceAttachment) {
39+
vmbda.Spec.VirtualMachineName = name
40+
}
41+
}
42+
43+
func WithBlockDeviceRef(kind v1alpha2.VMBDAObjectRefKind, name string) func(vmbda *v1alpha2.VirtualMachineBlockDeviceAttachment) {
44+
return func(vmbda *v1alpha2.VirtualMachineBlockDeviceAttachment) {
45+
vmbda.Spec.BlockDeviceRef = v1alpha2.VMBDAObjectRef{
46+
Kind: kind,
47+
Name: name,
48+
}
49+
}
50+
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
Copyright 2025 Flant JSC
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package vmbda
18+
19+
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
22+
"github.com/deckhouse/virtualization/api/core/v1alpha2"
23+
)
24+
25+
func New(options ...Option) *v1alpha2.VirtualMachineBlockDeviceAttachment {
26+
vmbda := NewEmpty("", "")
27+
ApplyOptions(vmbda, options...)
28+
return vmbda
29+
}
30+
31+
func ApplyOptions(vmbda *v1alpha2.VirtualMachineBlockDeviceAttachment, opts ...Option) {
32+
if vmbda == nil {
33+
return
34+
}
35+
for _, opt := range opts {
36+
opt(vmbda)
37+
}
38+
}
39+
40+
func NewEmpty(name, namespace string) *v1alpha2.VirtualMachineBlockDeviceAttachment {
41+
return &v1alpha2.VirtualMachineBlockDeviceAttachment{
42+
TypeMeta: metav1.TypeMeta{
43+
APIVersion: v1alpha2.SchemeGroupVersion.String(),
44+
Kind: v1alpha2.VirtualMachineBlockDeviceAttachmentKind,
45+
},
46+
ObjectMeta: metav1.ObjectMeta{
47+
Name: name,
48+
Namespace: namespace,
49+
},
50+
}
51+
}

images/virtualization-artifact/pkg/common/vmop/vmop.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ limitations under the License.
1717
package vmop
1818

1919
import (
20+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21+
22+
"github.com/deckhouse/virtualization-controller/pkg/controller/conditions"
2023
"github.com/deckhouse/virtualization/api/core/v1alpha2"
24+
"github.com/deckhouse/virtualization/api/core/v1alpha2/vmopcondition"
2125
)
2226

2327
func IsInProgressOrPending(vmop *v1alpha2.VirtualMachineOperation) bool {
@@ -44,3 +48,8 @@ func InProgressOrPendingExists(vmops []v1alpha2.VirtualMachineOperation) bool {
4448
}
4549
return false
4650
}
51+
52+
func IsOperationInProgress(vmop *v1alpha2.VirtualMachineOperation) bool {
53+
sent, _ := conditions.GetCondition(vmopcondition.TypeSignalSent, vmop.Status.Conditions)
54+
return sent.Status == metav1.ConditionTrue && !IsFinished(vmop)
55+
}

images/virtualization-artifact/pkg/controller/kvbuilder/kvvm_utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ func ApplyVirtualMachineSpec(
259259

260260
func ApplyMigrationVolumes(kvvm *KVVM, vm *virtv2.VirtualMachine, vdsByName map[string]*virtv2.VirtualDisk) error {
261261
bootOrder := uint(1)
262-
var updateVolumesStrategy *virtv1.UpdateVolumesStrategy
262+
var updateVolumesStrategy *virtv1.UpdateVolumesStrategy = nil
263263

264264
for _, bd := range vm.Spec.BlockDeviceRefs {
265265
if bd.Kind != virtv2.DiskDevice {

images/virtualization-artifact/pkg/controller/vd/internal/life_cycle.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package internal
1919
import (
2020
"context"
2121
"fmt"
22-
"time"
2322

2423
corev1 "k8s.io/api/core/v1"
2524
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -153,10 +152,5 @@ func (h LifeCycleHandler) Handle(ctx context.Context, vd *virtv2.VirtualDisk) (r
153152
return reconcile.Result{}, fmt.Errorf("failed to sync virtual disk data source %s: %w", ds.Name(), err)
154153
}
155154

156-
readyConditionAfterSync, _ := conditions.GetCondition(vdcondition.ReadyType, vd.Status.Conditions)
157-
if readyConditionAfterSync.Status == metav1.ConditionTrue && conditions.IsLastUpdated(readyConditionAfterSync, vd) {
158-
return reconcile.Result{RequeueAfter: 1 * time.Second}, nil
159-
}
160-
161155
return result, nil
162156
}

images/virtualization-artifact/pkg/controller/vd/internal/migration.go

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ func (h MigrationHandler) handleMigratePrepareTarget(ctx context.Context, vd *v1
410410
StartTimestamp: metav1.Now(),
411411
}
412412

413-
cb.Status(metav1.ConditionTrue).
413+
cb.Status(metav1.ConditionFalse).
414414
Reason(vdcondition.MigratingWaitForTargetReadyReason).
415415
Message("Migration started.")
416416
conditions.SetCondition(cb, &vd.Status.Conditions)
@@ -426,19 +426,17 @@ func (h MigrationHandler) handleMigrateSync(ctx context.Context, vd *v1alpha2.Vi
426426

427427
cb := conditions.NewConditionBuilder(vdcondition.MigratingType).
428428
Generation(vd.Generation).
429-
Status(metav1.ConditionTrue).
429+
Status(metav1.ConditionFalse).
430430
Reason(vdcondition.MigratingWaitForTargetReadyReason)
431431

432432
if pvc == nil {
433-
cb.Status(metav1.ConditionFalse).
434-
Reason(vdcondition.MigratingWaitForTargetReadyReason).
435-
Message("Target persistent volume claim is not found.")
433+
cb.Message("Target persistent volume claim is not found.")
436434
conditions.SetCondition(cb, &vd.Status.Conditions)
437435
return nil
438436
}
439437

440438
if pvc.Status.Phase == corev1.ClaimBound {
441-
cb.Reason(vdcondition.InProgress).Message("Target persistent volume claim is bound.")
439+
cb.Status(metav1.ConditionTrue).Reason(vdcondition.InProgress).Message("Target persistent volume claim is bound.")
442440
conditions.SetCondition(cb, &vd.Status.Conditions)
443441
return nil
444442
}
@@ -467,15 +465,13 @@ func (h MigrationHandler) handleMigrateSync(ctx context.Context, vd *v1alpha2.Vi
467465

468466
isWaitForFistConsumer := sc.VolumeBindingMode == nil || *sc.VolumeBindingMode == storev1.VolumeBindingWaitForFirstConsumer
469467
if isWaitForFistConsumer {
470-
cb.Reason(vdcondition.InProgress).Message("Target persistent volume claim is waiting for first consumer.")
468+
cb.Status(metav1.ConditionTrue).Reason(vdcondition.InProgress).Message("Target persistent volume claim is waiting for first consumer.")
471469
conditions.SetCondition(cb, &vd.Status.Conditions)
472470
return nil
473471
}
474472
}
475473

476-
cb.Status(metav1.ConditionFalse).
477-
Reason(vdcondition.MigratingWaitForTargetReadyReason).
478-
Message("Target persistent volume claim is not bound or not waiting for first consumer.")
474+
cb.Message("Target persistent volume claim is not bound or not waiting for first consumer.")
479475
conditions.SetCondition(cb, &vd.Status.Conditions)
480476
return nil
481477
}

images/virtualization-artifact/pkg/controller/vm/internal/migrating.go

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@ limitations under the License.
1717
package internal
1818

1919
import (
20+
"cmp"
2021
"context"
2122
"fmt"
23+
"slices"
2224
"strings"
2325

2426
corev1 "k8s.io/api/core/v1"
@@ -285,21 +287,30 @@ func (h *MigratingHandler) getVMOPCandidate(ctx context.Context, s state.Virtual
285287
return nil, err
286288
}
287289

288-
var candidate *virtv2.VirtualMachineOperation
289-
if len(vmops) > 0 {
290-
candidate = vmops[0]
290+
if len(vmops) == 0 {
291+
return nil, nil
292+
}
291293

292-
for _, vmop := range vmops {
293-
if !commonvmop.IsMigration(vmop) {
294-
continue
295-
}
296-
if vmop.GetCreationTimestamp().Time.After(candidate.GetCreationTimestamp().Time) {
297-
candidate = vmop
298-
}
294+
// sort vmops from the oldest to the newest
295+
slices.SortFunc(vmops, func(a, b *virtv2.VirtualMachineOperation) int {
296+
return cmp.Compare(a.GetCreationTimestamp().UnixNano(), b.GetCreationTimestamp().UnixNano())
297+
})
298+
299+
migrations := slices.DeleteFunc(vmops, func(vmop *virtv2.VirtualMachineOperation) bool {
300+
return !commonvmop.IsMigration(vmop)
301+
})
302+
303+
for _, migration := range migrations {
304+
if commonvmop.IsInProgressOrPending(migration) {
305+
return migration, nil
299306
}
300307
}
301308

302-
return candidate, nil
309+
if len(migrations) > 0 {
310+
return migrations[len(migrations)-1], nil
311+
}
312+
313+
return nil, nil
303314
}
304315

305316
func (h *MigratingHandler) syncMigratable(ctx context.Context, s state.VirtualMachineState, vm *virtv2.VirtualMachine, kvvm *virtv1.VirtualMachine) error {

0 commit comments

Comments
 (0)