Skip to content

Commit 87ba6f4

Browse files
refactor: clean up partition e2e tests
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 001b79e commit 87ba6f4

1 file changed

Lines changed: 31 additions & 77 deletions

File tree

test/e2e/controller-manager/model_serving_test.go

Lines changed: 31 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -957,7 +957,7 @@ func waitForPodsByLabel(
957957
return false
958958
}
959959
return verify(pods.Items)
960-
}, timeout, 5*time.Second, failureMsg)
960+
}, timeout, 1*time.Second, failureMsg)
961961
}
962962

963963
// createRole is a helper function to create a Role with specified replicas and workers
@@ -1391,13 +1391,11 @@ func TestModelServingPartitionBoundaryProtection(t *testing.T) {
13911391

13921392
// Verify Status.CurrentRevision and Status.UpdateRevision.
13931393
t.Log("Verifying Status.CurrentRevision and Status.UpdateRevision after partitioned update")
1394-
var finalMS *workload.ModelServing
13951394
require.Eventually(t, func() bool {
13961395
ms, err := kthenaClient.WorkloadV1alpha1().ModelServings(testNamespace).Get(ctx, modelServing.Name, metav1.GetOptions{})
13971396
if err != nil {
13981397
return false
13991398
}
1400-
finalMS = ms
14011399

14021400
t.Logf("CurrentRevision: %s, UpdateRevision: %s, UpdatedReplicas: %d",
14031401
ms.Status.CurrentRevision, ms.Status.UpdateRevision, ms.Status.UpdatedReplicas)
@@ -1414,51 +1412,11 @@ func TestModelServingPartitionBoundaryProtection(t *testing.T) {
14141412
return ms.Status.UpdatedReplicas == (replicas - partition)
14151413
}, 3*time.Minute, 5*time.Second, "Revision status fields incorrect after partitioned update")
14161414

1417-
assert.Equal(t, initialCurrentRevision, finalMS.Status.CurrentRevision,
1418-
"CurrentRevision should remain the initial revision")
1419-
assert.NotEqual(t, finalMS.Status.CurrentRevision, finalMS.Status.UpdateRevision,
1420-
"CurrentRevision and UpdateRevision should differ during partitioned update")
1421-
14221415
// Verify per-ordinal revision labels and images.
14231416
t.Log("Verifying per-ordinal revisions and images")
14241417
labelSelector := modelServingLabelSelector(modelServing.Name)
14251418
waitForPodsByLabel(t, ctx, kubeClient, labelSelector, 3*time.Minute, func(pods []corev1.Pod) bool {
1426-
protectedCorrect := 0
1427-
updatedCorrect := 0
1428-
1429-
for _, pod := range pods {
1430-
if pod.DeletionTimestamp != nil || pod.Status.Phase != corev1.PodRunning {
1431-
continue
1432-
}
1433-
1434-
groupName := pod.Labels["modelserving.volcano.sh/group-name"]
1435-
_, ordinal := getGroupOrdinal(groupName)
1436-
if ordinal < 0 {
1437-
continue
1438-
}
1439-
1440-
podRevision := pod.Labels["modelserving.volcano.sh/revision"]
1441-
containerImage := getPodContainerImage(pod, "test-container")
1442-
1443-
if ordinal < int(partition) {
1444-
// Protected ordinals: revision = CurrentRevision, image = old
1445-
if podRevision == finalMS.Status.CurrentRevision && containerImage == nginxImage {
1446-
protectedCorrect++
1447-
} else {
1448-
t.Logf("Protected pod %s (ordinal %d): revision=%s (want %s), image=%s (want %s)",
1449-
pod.Name, ordinal, podRevision, finalMS.Status.CurrentRevision, containerImage, nginxImage)
1450-
}
1451-
} else {
1452-
// Updated ordinals: revision = UpdateRevision, image = new
1453-
if podRevision == finalMS.Status.UpdateRevision && containerImage == "nginx:alpine" {
1454-
updatedCorrect++
1455-
} else {
1456-
t.Logf("Updated pod %s (ordinal %d): revision=%s (want %s), image=%s (want nginx:alpine)",
1457-
pod.Name, ordinal, podRevision, finalMS.Status.UpdateRevision, containerImage)
1458-
}
1459-
}
1460-
}
1461-
1419+
protectedCorrect, updatedCorrect := verifyPartitionState(t, pods, int(partition), nginxImage, "nginx:alpine")
14621420
t.Logf("Protected correct: %d/3, Updated correct: %d/2", protectedCorrect, updatedCorrect)
14631421
return protectedCorrect == 3 && updatedCorrect == 2
14641422
}, "Per-ordinal revision/image verification failed")
@@ -1511,20 +1469,7 @@ func TestModelServingPartitionDeletedGroupHistoricalRevision(t *testing.T) {
15111469
// Verify the partitioned state is established: R-0,R-1,R-2 have old image, R-3,R-4 have new
15121470
labelSelector := modelServingLabelSelector(modelServing.Name)
15131471
waitForPodsByLabel(t, ctx, kubeClient, labelSelector, 3*time.Minute, func(pods []corev1.Pod) bool {
1514-
protectedOld, updatedNew := 0, 0
1515-
for _, pod := range pods {
1516-
if pod.DeletionTimestamp != nil {
1517-
continue
1518-
}
1519-
groupName := pod.Labels["modelserving.volcano.sh/group-name"]
1520-
_, ordinal := getGroupOrdinal(groupName)
1521-
image := getPodContainerImage(pod, "test-container")
1522-
if ordinal < int(partition) && image == nginxImage {
1523-
protectedOld++
1524-
} else if ordinal >= int(partition) && image == "nginx:alpine" {
1525-
updatedNew++
1526-
}
1527-
}
1472+
protectedOld, updatedNew := verifyPartitionState(t, pods, int(partition), nginxImage, "nginx:alpine")
15281473
return protectedOld == 3 && updatedNew == 2
15291474
}, "Failed to reach partitioned state")
15301475

@@ -1614,30 +1559,17 @@ func TestModelServingPartitionDeletedGroupHistoricalRevision(t *testing.T) {
16141559
// Also verify the overall state is still correct: 3 protected + 2 updated
16151560
t.Log("Verifying overall partition state is preserved after pod recreation")
16161561
waitForPodsByLabel(t, ctx, kubeClient, labelSelector, 3*time.Minute, func(pods []corev1.Pod) bool {
1617-
protectedOld, updatedNew := 0, 0
1618-
for _, pod := range pods {
1619-
if pod.DeletionTimestamp != nil || pod.Status.Phase != corev1.PodRunning {
1620-
continue
1621-
}
1622-
groupName := pod.Labels["modelserving.volcano.sh/group-name"]
1623-
_, ordinal := getGroupOrdinal(groupName)
1624-
image := getPodContainerImage(pod, "test-container")
1625-
if ordinal >= 0 && ordinal < int(partition) && image == nginxImage {
1626-
protectedOld++
1627-
} else if ordinal >= int(partition) && image == "nginx:alpine" {
1628-
updatedNew++
1629-
}
1630-
}
1562+
protectedOld, updatedNew := verifyPartitionState(t, pods, int(partition), nginxImage, "nginx:alpine")
16311563
t.Logf("Protected with old image: %d/3, Updated with new image: %d/2", protectedOld, updatedNew)
16321564
return protectedOld == 3 && updatedNew == 2
16331565
}, "Overall partition state broken after pod recreation")
16341566

16351567
t.Log("ModelServing partition deleted group historical revision test passed successfully")
16361568
}
16371569

1638-
// TestModelServingNoPartitionRollingUpdate verifies default rolling-update behavior
1570+
// TestModelServingRollingUpdate verifies default rolling-update behavior
16391571
// when partition is nil: all replicas move to the new revision and image.
1640-
func TestModelServingNoPartitionRollingUpdate(t *testing.T) {
1572+
func TestModelServingRollingUpdate(t *testing.T) {
16411573
ctx, kthenaClient, kubeClient := setupControllerManagerE2ETest(t)
16421574

16431575
// Create a ModelServing with 4 replicas and NO partition (default behavior)
@@ -1738,7 +1670,7 @@ func TestModelServingNoPartitionRollingUpdate(t *testing.T) {
17381670
assert.Equal(t, replicas, finalMS.Status.UpdatedReplicas,
17391671
"All replicas should be updated when no partition is set")
17401672

1741-
t.Log("ModelServing no-partition rolling update test passed successfully")
1673+
t.Log("ModelServing rolling update test passed successfully")
17421674
}
17431675

17441676
// getGroupOrdinal extracts the ordinal from a ServingGroup name (e.g., "test-ms-3" -> 3).
@@ -1758,6 +1690,30 @@ func getGroupOrdinal(groupName string) (string, int) {
17581690
return parent, ordinal
17591691
}
17601692

1693+
// verifyPartitionState checks that pods below partition have oldImage and pods at/above partition have newImage.
1694+
// Returns (protectedCount, updatedCount) of correctly configured running pods.
1695+
func verifyPartitionState(t *testing.T, pods []corev1.Pod, partition int, oldImage, newImage string) (int, int) {
1696+
t.Helper()
1697+
protectedCorrect, updatedCorrect := 0, 0
1698+
for _, pod := range pods {
1699+
if pod.DeletionTimestamp != nil || pod.Status.Phase != corev1.PodRunning {
1700+
continue
1701+
}
1702+
groupName := pod.Labels["modelserving.volcano.sh/group-name"]
1703+
_, ordinal := getGroupOrdinal(groupName)
1704+
if ordinal < 0 {
1705+
continue
1706+
}
1707+
image := getPodContainerImage(pod, "test-container")
1708+
if ordinal < partition && image == oldImage {
1709+
protectedCorrect++
1710+
} else if ordinal >= partition && image == newImage {
1711+
updatedCorrect++
1712+
}
1713+
}
1714+
return protectedCorrect, updatedCorrect
1715+
}
1716+
17611717
func createRollingUpdateModelServing(name string, servingGroupReplicas int32, partition *int32) *workload.ModelServing {
17621718
roleReplicas := int32(1)
17631719
rollingUpdateConfig := &workload.RollingUpdateConfiguration{
@@ -1835,8 +1791,6 @@ func TestModelServingControllerManagerRestart(t *testing.T) {
18351791
_ = kthenaClient.WorkloadV1alpha1().ModelServings(testNamespace).Delete(cleanupCtx, modelServing.Name, metav1.DeleteOptions{})
18361792
})
18371793

1838-
// ModelServing Partition Revision Control
1839-
18401794
// Wait briefly for initial reconciliation to start
18411795
t.Log("Waiting for initial reconciliation to start...")
18421796
// Wait for a random duration between 0 and 3 seconds (in 100ms increments)

0 commit comments

Comments
 (0)