Skip to content

Commit 13d5704

Browse files
refactor the e2e test
Signed-off-by: katara-Jayprakash <katarajayprakash@icloud.com>
1 parent c4268c4 commit 13d5704

2 files changed

Lines changed: 80 additions & 6 deletions

File tree

pkg/model-serving-controller/utils/controller_revision_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,8 @@ func TestGetControllerRevision(t *testing.T) {
107107
assert.Equal(t, "revision-v2", cr.Labels[ControllerRevisionRevisionLabelKey])
108108
}
109109

110-
110+
// TestCleanupOldControllerRevisions_PreservesCurrentAndUpdateRevisions tests that
111+
// CleanupOldControllerRevisions always preserves CurrentRevision and UpdateRevision
111112
func TestCleanupOldControllerRevisions_PreservesCurrentAndUpdateRevisions(t *testing.T) {
112113
ctx := context.Background()
113114
client := kubefake.NewSimpleClientset()

test/e2e/controller-manager/model_serving_test.go

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,7 +1199,11 @@ func TestModelServingRollingUpdateMaxUnavailableWithBadImage(t *testing.T) {
11991199
t.Log("ModelServing rolling update maxUnavailable with bad image test passed successfully")
12001200
}
12011201

1202-
1202+
// TestLWSAPIBasic tests that kthena can process LWS API correctly by:
1203+
// 1. Creating a simple LWS instance
1204+
// 2. Verifying corresponding ModelServing is created with proper owner references
1205+
// 3. Verifying pods are created automatically
1206+
// 4. Deleting LWS and verifying all resources are cleaned up
12031207
func TestLWSAPIBasic(t *testing.T) {
12041208
ctx, kthenaClient, _ := setupControllerManagerE2ETest(t)
12051209

@@ -1332,6 +1336,15 @@ func TestLWSAPIBasic(t *testing.T) {
13321336
t.Log("LWS API basic test passed successfully")
13331337
}
13341338

1339+
// TestModelServingPartitionBoundaryProtection verifies the protective effect of partition
1340+
// boundaries during rolling updates. It creates a ModelServing with partition=3, triggers
1341+
// a template change, and then verifies:
1342+
// - Status.CurrentRevision remains the old revision (protecting ordinals 0,1,2)
1343+
// - Status.UpdateRevision is set to a new revision
1344+
// - Pods with ordinal < partition carry CurrentRevision and old image
1345+
// - Pods with ordinal >= partition carry UpdateRevision and new image
1346+
//
1347+
// This is the E2E counterpart of TestModelServingVersionControl from the unit tests.
13351348
func TestModelServingPartitionBoundaryProtection(t *testing.T) {
13361349
ctx, kthenaClient, kubeClient := setupControllerManagerE2ETest(t)
13371350

@@ -1482,7 +1495,16 @@ func TestModelServingPartitionBoundaryProtection(t *testing.T) {
14821495
t.Log("ModelServing partition boundary protection test passed successfully")
14831496
}
14841497

1485-
1498+
// TestModelServingPartitionDeletedGroupHistoricalRevision verifies that when a pod
1499+
// (or ServingGroup) within the partition-protected range is deleted after a template
1500+
// change, the controller rebuilds it using the historical revision (CurrentRevision),
1501+
// NOT the new UpdateRevision.
1502+
//
1503+
// Scenario: partition=3, 5 replicas with updated template. Delete R-1 pod (ordinal < partition).
1504+
// Expected: R-1 is rebuilt using CurrentRevision (old image), not UpdateRevision (new image).
1505+
//
1506+
// This is the E2E counterpart of the "partition=2, recreate protected group should use
1507+
// historical revision" test case from TestModelServingVersionControl.
14861508
func TestModelServingPartitionDeletedGroupHistoricalRevision(t *testing.T) {
14871509
ctx, kthenaClient, kubeClient := setupControllerManagerE2ETest(t)
14881510

@@ -1547,6 +1569,18 @@ func TestModelServingPartitionDeletedGroupHistoricalRevision(t *testing.T) {
15471569

15481570
utils.WaitForModelServingReady(t, ctx, kthenaClient, testNamespace, modelServing.Name)
15491571

1572+
// Verify revision status is in partitioned state before deleting a protected group.
1573+
require.Eventually(t, func() bool {
1574+
ms, err := kthenaClient.WorkloadV1alpha1().ModelServings(testNamespace).Get(ctx, modelServing.Name, metav1.GetOptions{})
1575+
if err != nil {
1576+
return false
1577+
}
1578+
return ms.Status.CurrentRevision == initialCurrentRevision &&
1579+
ms.Status.UpdateRevision != "" &&
1580+
ms.Status.UpdateRevision != initialCurrentRevision &&
1581+
ms.Status.UpdatedReplicas == (replicas-partition)
1582+
}, 3*time.Minute, 5*time.Second, "ModelServing revision status did not converge to expected partitioned state")
1583+
15501584
// Verify the partitioned state is established: R-0,R-1,R-2 have old image, R-3,R-4 have new
15511585
labelSelector := modelServingLabelSelector(modelServing.Name)
15521586
require.Eventually(t, func() bool {
@@ -1638,8 +1672,12 @@ func TestModelServingPartitionDeletedGroupHistoricalRevision(t *testing.T) {
16381672
t.Logf("Recreated pod %s (ordinal 1): revision=%s, image=%s", pod.Name, podRevision, containerImage)
16391673

16401674
// The recreated pod should use the historical revision
1641-
ms, _ := kthenaClient.WorkloadV1alpha1().ModelServings(testNamespace).Get(ctx, modelServing.Name, metav1.GetOptions{})
1642-
if ms != nil && podRevision == ms.Status.CurrentRevision && containerImage == nginxImage {
1675+
ms, err := kthenaClient.WorkloadV1alpha1().ModelServings(testNamespace).Get(ctx, modelServing.Name, metav1.GetOptions{})
1676+
if err != nil {
1677+
t.Logf("Failed to get ModelServing while verifying recreated pod: %v", err)
1678+
return false
1679+
}
1680+
if podRevision == ms.Status.CurrentRevision && containerImage == nginxImage {
16431681
return true
16441682
}
16451683
return false
@@ -1675,7 +1713,13 @@ func TestModelServingPartitionDeletedGroupHistoricalRevision(t *testing.T) {
16751713
t.Log("ModelServing partition deleted group historical revision test passed successfully")
16761714
}
16771715

1678-
1716+
// TestModelServingNoPartitionRollingUpdate verifies that when no partition is set
1717+
// (partition=nil), a rolling update behaves consistently with existing behavior:
1718+
// all replicas are updated to the new revision and the new image.
1719+
// After the update, CurrentRevision and UpdateRevision should converge.
1720+
//
1721+
// This is the E2E counterpart of the "no partition, recreated group should use
1722+
// new revision" test case from TestModelServingVersionControl.
16791723
func TestModelServingNoPartitionRollingUpdate(t *testing.T) {
16801724
ctx, kthenaClient, kubeClient := setupControllerManagerE2ETest(t)
16811725

@@ -1785,6 +1829,35 @@ func TestModelServingNoPartitionRollingUpdate(t *testing.T) {
17851829
t.Logf("Final CurrentRevision: %s, UpdateRevision: %s, UpdatedReplicas: %d",
17861830
finalMS.Status.CurrentRevision, finalMS.Status.UpdateRevision, finalMS.Status.UpdatedReplicas)
17871831

1832+
// Verify all running pods converged to the final revision and image.
1833+
require.Eventually(t, func() bool {
1834+
pods, err := kubeClient.CoreV1().Pods(testNamespace).List(ctx, metav1.ListOptions{
1835+
LabelSelector: labelSelector,
1836+
})
1837+
if err != nil || len(pods.Items) == 0 {
1838+
return false
1839+
}
1840+
1841+
runningCount := 0
1842+
for _, pod := range pods.Items {
1843+
if pod.DeletionTimestamp != nil {
1844+
continue
1845+
}
1846+
if pod.Status.Phase != corev1.PodRunning {
1847+
return false
1848+
}
1849+
runningCount++
1850+
1851+
containerImage := getPodContainerImage(pod, "test-container")
1852+
podRevision := pod.Labels["modelserving.volcano.sh/revision"]
1853+
if containerImage != "nginx:alpine" || podRevision != finalMS.Status.CurrentRevision {
1854+
return false
1855+
}
1856+
}
1857+
1858+
return runningCount == int(replicas)
1859+
}, 3*time.Minute, 5*time.Second, "Pods did not converge to final revision/image")
1860+
17881861
assert.Equal(t, finalMS.Status.CurrentRevision, finalMS.Status.UpdateRevision,
17891862
"Without partition, CurrentRevision and UpdateRevision should converge after full update")
17901863
assert.Equal(t, replicas, finalMS.Status.UpdatedReplicas,

0 commit comments

Comments
 (0)