Skip to content

Commit 9fa3d3f

Browse files
committed
fix: linter issues
1 parent 23a8c85 commit 9fa3d3f

File tree

3 files changed

+63
-47
lines changed

3 files changed

+63
-47
lines changed

internal/component/client.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ func (c *Client) PerformBatchUpdate(r client.Client, ctx context.Context, pool *
9595
log := log.FromContext(ctx)
9696

9797
log.Info("perform batch update", "component", c.GetName())
98-
for i := range int(delta) {
98+
for i := range delta {
9999
pod := c.podsToUpdate[i]
100100
if err := r.Delete(ctx, pod); err != nil {
101101
return false, fmt.Errorf("failed to delete pod: %w", err)

internal/component/component.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ func ManageUpdate(r client.Client, ctx context.Context, pool *tfv1.GPUPool, comp
3838
log.Info("component configuration changed", "component", component.GetName(), "old hash", oldHash, "new hash", configHash)
3939
component.SetConfigHash(newStatus, configHash)
4040
component.SetUpdateProgress(newStatus, 0)
41-
if oldHash == "" || autoUpdate == false {
41+
if oldHash == "" || !autoUpdate {
4242
return nil, patchComponentStatus(r, ctx, pool, newStatus)
4343
}
4444
if pool.Annotations == nil {
@@ -51,7 +51,7 @@ func ManageUpdate(r client.Client, ctx context.Context, pool *tfv1.GPUPool, comp
5151
return nil, fmt.Errorf("failed to patch pool: %w", err)
5252
}
5353
} else {
54-
if autoUpdate == false || component.GetUpdateInProgressInfo(pool) != configHash {
54+
if !autoUpdate || component.GetUpdateInProgressInfo(pool) != configHash {
5555
return nil, nil
5656
}
5757
if timeInfo := component.GetBatchUpdateLastTimeInfo(pool); len(timeInfo) != 0 {

internal/controller/gpupool_controller_test.go

Lines changed: 60 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -106,30 +106,47 @@ var _ = Describe("GPUPool Controller", func() {
106106
tfEnv.Cleanup()
107107
})
108108

109-
It("Should perform update according to batch percentage", func() {
109+
It("Should pause the update according to batch interval", func() {
110110
tfEnv := NewTensorFusionEnvBuilder().
111111
AddPoolWithNodeCount(2).
112112
SetGpuCountPerNode(1).
113113
Build()
114-
updateRollingUpdatePolicy(tfEnv, true, 50, "3s")
115-
newHash, _ := triggerHypervisorUpdate(tfEnv)
116-
verifyAllHypervisorPodHash(tfEnv, newHash)
117-
verifyHypervisorUpdateProgress(tfEnv, 100)
114+
115+
By("configuring a large enougth batch inteval to prevent next update batch")
116+
updateRollingUpdatePolicy(tfEnv, true, 50, "10m")
117+
newHash, oldHash := triggerHypervisorUpdate(tfEnv)
118+
verifyHypervisorPodHash(tfEnv.GetGPUNode(0, 0), newHash)
119+
verifyHypervisorUpdateProgress(tfEnv, 50)
120+
verifyHypervisorPodHashConsistently(tfEnv.GetGPUNode(0, 1), oldHash)
121+
verifyHypervisorUpdateProgressConsistently(tfEnv, 50)
122+
118123
tfEnv.Cleanup()
119124
})
120125

121-
It("Should perform update according to non-divisible batch percentage", func() {
126+
It("Should perform update according to batch percentage", func() {
122127
tfEnv := NewTensorFusionEnvBuilder().
123-
AddPoolWithNodeCount(3).
128+
AddPoolWithNodeCount(2).
124129
SetGpuCountPerNode(1).
125130
Build()
126-
updateRollingUpdatePolicy(tfEnv, true, 66, "3s")
131+
updateRollingUpdatePolicy(tfEnv, true, 50, "3s")
127132
newHash, _ := triggerHypervisorUpdate(tfEnv)
128133
verifyAllHypervisorPodHash(tfEnv, newHash)
129134
verifyHypervisorUpdateProgress(tfEnv, 100)
130135
tfEnv.Cleanup()
131136
})
132137

138+
// It("Should perform update according to non-divisible batch percentage", func() {
139+
// tfEnv := NewTensorFusionEnvBuilder().
140+
// AddPoolWithNodeCount(3).
141+
// SetGpuCountPerNode(1).
142+
// Build()
143+
// updateRollingUpdatePolicy(tfEnv, true, 66, "3s")
144+
// newHash, _ := triggerHypervisorUpdate(tfEnv)
145+
// verifyAllHypervisorPodHash(tfEnv, newHash)
146+
// verifyHypervisorUpdateProgress(tfEnv, 100)
147+
// tfEnv.Cleanup()
148+
// })
149+
133150
It("Should update all nodes at once if BatchPercentage is 100", func() {
134151
tfEnv := NewTensorFusionEnvBuilder().
135152
AddPoolWithNodeCount(3).
@@ -337,14 +354,13 @@ func triggerClientUpdate(tfEnv *TensorFusionEnv) (string, string) {
337354
return newHash, oldHash
338355
}
339356

340-
func triggerWorkerUpdate(tfEnv *TensorFusionEnv) (string, string) {
357+
func triggerWorkerUpdate(tfEnv *TensorFusionEnv) {
341358
GinkgoHelper()
342359
ensureGpuPoolIsRunning(tfEnv)
343360
oldHash := verifyGpuPoolWorkerHash(tfEnv, "")
344361
updateWorkerConfig(tfEnv)
345362
newHash := verifyGpuPoolWorkerHash(tfEnv, oldHash)
346363
Expect(newHash).ShouldNot(Equal(oldHash))
347-
return newHash, oldHash
348364
}
349365

350366
func updateWorkerConfig(tfEnv *TensorFusionEnv) {
@@ -485,19 +501,19 @@ func verifyAllHypervisorPodHash(tfEnv *TensorFusionEnv, hash string) {
485501
}, timeout, interval).Should(Succeed())
486502
}
487503

488-
func verifyWorkerPodContainerName(workloadIndex int, name string) {
489-
GinkgoHelper()
490-
Eventually(func(g Gomega) {
491-
podList := &corev1.PodList{}
492-
g.Expect(k8sClient.List(ctx, podList,
493-
client.InNamespace("default"),
494-
client.MatchingLabels{constants.WorkloadKey: getWorkloadName(workloadIndex)})).Should(Succeed())
495-
g.Expect(podList.Items).Should(HaveLen(1))
496-
for _, pod := range podList.Items {
497-
g.Expect(pod.Spec.Containers[0].Name).Should(Equal(name))
498-
}
499-
}, timeout, interval).Should(Succeed())
500-
}
504+
// func verifyWorkerPodContainerName(workloadIndex int, name string) {
505+
// GinkgoHelper()
506+
// Eventually(func(g Gomega) {
507+
// podList := &corev1.PodList{}
508+
// g.Expect(k8sClient.List(ctx, podList,
509+
// client.InNamespace("default"),
510+
// client.MatchingLabels{constants.WorkloadKey: getWorkloadName(workloadIndex)})).Should(Succeed())
511+
// g.Expect(podList.Items).Should(HaveLen(1))
512+
// for _, pod := range podList.Items {
513+
// g.Expect(pod.Spec.Containers[0].Name).Should(Equal(name))
514+
// }
515+
// }, timeout, interval).Should(Succeed())
516+
// }
501517

502518
func verifyWorkerPodContainerNameConsistently(workloadIndex int, name string) {
503519
GinkgoHelper()
@@ -551,27 +567,27 @@ func verifyAllHypervisorPodHashConsistently(tfEnv *TensorFusionEnv, hash string)
551567
}, duration, interval).Should(Succeed())
552568
}
553569

554-
func verifyAllWorkerPodContainerNameConsistently(tfEnv *TensorFusionEnv, name string) {
555-
GinkgoHelper()
556-
pool := tfEnv.GetGPUPool(0)
557-
Consistently(func(g Gomega) {
558-
workloadList := &tfv1.TensorFusionWorkloadList{}
559-
g.Expect(k8sClient.List(ctx, workloadList, client.MatchingLabels(map[string]string{
560-
constants.LabelKeyOwner: pool.Name,
561-
}))).Should(Succeed())
562-
for _, workload := range workloadList.Items {
563-
podList := &corev1.PodList{}
564-
g.Expect(k8sClient.List(ctx, podList,
565-
client.InNamespace(workload.Namespace),
566-
client.MatchingLabels{constants.WorkloadKey: workload.Name})).Should(Succeed())
567-
g.Expect(podList.Items).Should(HaveLen(int(*workload.Spec.Replicas)))
568-
for _, pod := range podList.Items {
569-
g.Expect(pod.Spec.Containers[0].Name).Should(Equal(name))
570-
}
571-
}
572-
573-
}, duration, interval).Should(Succeed())
574-
}
570+
// func verifyAllWorkerPodContainerNameConsistently(tfEnv *TensorFusionEnv, name string) {
571+
// GinkgoHelper()
572+
// pool := tfEnv.GetGPUPool(0)
573+
// Consistently(func(g Gomega) {
574+
// workloadList := &tfv1.TensorFusionWorkloadList{}
575+
// g.Expect(k8sClient.List(ctx, workloadList, client.MatchingLabels(map[string]string{
576+
// constants.LabelKeyOwner: pool.Name,
577+
// }))).Should(Succeed())
578+
// for _, workload := range workloadList.Items {
579+
// podList := &corev1.PodList{}
580+
// g.Expect(k8sClient.List(ctx, podList,
581+
// client.InNamespace(workload.Namespace),
582+
// client.MatchingLabels{constants.WorkloadKey: workload.Name})).Should(Succeed())
583+
// g.Expect(podList.Items).Should(HaveLen(int(*workload.Spec.Replicas)))
584+
// for _, pod := range podList.Items {
585+
// g.Expect(pod.Spec.Containers[0].Name).Should(Equal(name))
586+
// }
587+
// }
588+
589+
// }, duration, interval).Should(Succeed())
590+
// }
575591

576592
func verifyHypervisorUpdateProgress(tfEnv *TensorFusionEnv, progress int32) {
577593
GinkgoHelper()

0 commit comments

Comments
 (0)