@@ -1199,7 +1199,11 @@ func TestModelServingRollingUpdateMaxUnavailableWithBadImage(t *testing.T) {
11991199 t .Log ("ModelServing rolling update maxUnavailable with bad image test passed successfully" )
12001200}
12011201
1202-
1202+ // TestLWSAPIBasic tests that kthena can process LWS API correctly by:
1203+ // 1. Creating a simple LWS instance
1204+ // 2. Verifying corresponding ModelServing is created with proper owner references
1205+ // 3. Verifying pods are created automatically
1206+ // 4. Deleting LWS and verifying all resources are cleaned up
12031207func TestLWSAPIBasic (t * testing.T ) {
12041208 ctx , kthenaClient , _ := setupControllerManagerE2ETest (t )
12051209
@@ -1332,6 +1336,15 @@ func TestLWSAPIBasic(t *testing.T) {
13321336 t .Log ("LWS API basic test passed successfully" )
13331337}
13341338
1339+ // TestModelServingPartitionBoundaryProtection verifies the protective effect of partition
1340+ // boundaries during rolling updates. It creates a ModelServing with partition=3, triggers
1341+ // a template change, and then verifies:
1342+ // - Status.CurrentRevision remains the old revision (protecting ordinals 0,1,2)
1343+ // - Status.UpdateRevision is set to a new revision
1344+ // - Pods with ordinal < partition carry CurrentRevision and old image
1345+ // - Pods with ordinal >= partition carry UpdateRevision and new image
1346+ //
1347+ // This is the E2E counterpart of TestModelServingVersionControl from the unit tests.
13351348func TestModelServingPartitionBoundaryProtection (t * testing.T ) {
13361349 ctx , kthenaClient , kubeClient := setupControllerManagerE2ETest (t )
13371350
@@ -1482,7 +1495,16 @@ func TestModelServingPartitionBoundaryProtection(t *testing.T) {
14821495 t .Log ("ModelServing partition boundary protection test passed successfully" )
14831496}
14841497
1485-
1498+ // TestModelServingPartitionDeletedGroupHistoricalRevision verifies that when a pod
1499+ // (or ServingGroup) within the partition-protected range is deleted after a template
1500+ // change, the controller rebuilds it using the historical revision (CurrentRevision),
1501+ // NOT the new UpdateRevision.
1502+ //
1503+ // Scenario: partition=3, 5 replicas with updated template. Delete R-1 pod (ordinal < partition).
1504+ // Expected: R-1 is rebuilt using CurrentRevision (old image), not UpdateRevision (new image).
1505+ //
1506+ // This is the E2E counterpart of the "partition=2, recreate protected group should use
1507+ // historical revision" test case from TestModelServingVersionControl.
14861508func TestModelServingPartitionDeletedGroupHistoricalRevision (t * testing.T ) {
14871509 ctx , kthenaClient , kubeClient := setupControllerManagerE2ETest (t )
14881510
@@ -1547,6 +1569,18 @@ func TestModelServingPartitionDeletedGroupHistoricalRevision(t *testing.T) {
15471569
15481570 utils .WaitForModelServingReady (t , ctx , kthenaClient , testNamespace , modelServing .Name )
15491571
1572+ // Verify revision status is in partitioned state before deleting a protected group.
1573+ require .Eventually (t , func () bool {
1574+ ms , err := kthenaClient .WorkloadV1alpha1 ().ModelServings (testNamespace ).Get (ctx , modelServing .Name , metav1.GetOptions {})
1575+ if err != nil {
1576+ return false
1577+ }
1578+ return ms .Status .CurrentRevision == initialCurrentRevision &&
1579+ ms .Status .UpdateRevision != "" &&
1580+ ms .Status .UpdateRevision != initialCurrentRevision &&
1581+ ms .Status .UpdatedReplicas == (replicas - partition )
1582+ }, 3 * time .Minute , 5 * time .Second , "ModelServing revision status did not converge to expected partitioned state" )
1583+
15501584 // Verify the partitioned state is established: R-0,R-1,R-2 have old image, R-3,R-4 have new
15511585 labelSelector := modelServingLabelSelector (modelServing .Name )
15521586 require .Eventually (t , func () bool {
@@ -1638,8 +1672,12 @@ func TestModelServingPartitionDeletedGroupHistoricalRevision(t *testing.T) {
16381672 t .Logf ("Recreated pod %s (ordinal 1): revision=%s, image=%s" , pod .Name , podRevision , containerImage )
16391673
16401674 // The recreated pod should use the historical revision
1641- ms , _ := kthenaClient .WorkloadV1alpha1 ().ModelServings (testNamespace ).Get (ctx , modelServing .Name , metav1.GetOptions {})
1642- if ms != nil && podRevision == ms .Status .CurrentRevision && containerImage == nginxImage {
1675+ ms , err := kthenaClient .WorkloadV1alpha1 ().ModelServings (testNamespace ).Get (ctx , modelServing .Name , metav1.GetOptions {})
1676+ if err != nil {
1677+ t .Logf ("Failed to get ModelServing while verifying recreated pod: %v" , err )
1678+ return false
1679+ }
1680+ if podRevision == ms .Status .CurrentRevision && containerImage == nginxImage {
16431681 return true
16441682 }
16451683 return false
@@ -1675,7 +1713,13 @@ func TestModelServingPartitionDeletedGroupHistoricalRevision(t *testing.T) {
16751713 t .Log ("ModelServing partition deleted group historical revision test passed successfully" )
16761714}
16771715
1678-
1716+ // TestModelServingNoPartitionRollingUpdate verifies that when no partition is set
1717+ // (partition=nil), a rolling update behaves consistently with existing behavior:
1718+ // all replicas are updated to the new revision and the new image.
1719+ // After the update, CurrentRevision and UpdateRevision should converge.
1720+ //
1721+ // This is the E2E counterpart of the "no partition, recreated group should use
1722+ // new revision" test case from TestModelServingVersionControl.
16791723func TestModelServingNoPartitionRollingUpdate (t * testing.T ) {
16801724 ctx , kthenaClient , kubeClient := setupControllerManagerE2ETest (t )
16811725
@@ -1785,6 +1829,35 @@ func TestModelServingNoPartitionRollingUpdate(t *testing.T) {
17851829 t .Logf ("Final CurrentRevision: %s, UpdateRevision: %s, UpdatedReplicas: %d" ,
17861830 finalMS .Status .CurrentRevision , finalMS .Status .UpdateRevision , finalMS .Status .UpdatedReplicas )
17871831
1832+ // Verify all running pods converged to the final revision and image.
1833+ require .Eventually (t , func () bool {
1834+ pods , err := kubeClient .CoreV1 ().Pods (testNamespace ).List (ctx , metav1.ListOptions {
1835+ LabelSelector : labelSelector ,
1836+ })
1837+ if err != nil || len (pods .Items ) == 0 {
1838+ return false
1839+ }
1840+
1841+ runningCount := 0
1842+ for _ , pod := range pods .Items {
1843+ if pod .DeletionTimestamp != nil {
1844+ continue
1845+ }
1846+ if pod .Status .Phase != corev1 .PodRunning {
1847+ return false
1848+ }
1849+ runningCount ++
1850+
1851+ containerImage := getPodContainerImage (pod , "test-container" )
1852+ podRevision := pod .Labels ["modelserving.volcano.sh/revision" ]
1853+ if containerImage != "nginx:alpine" || podRevision != finalMS .Status .CurrentRevision {
1854+ return false
1855+ }
1856+ }
1857+
1858+ return runningCount == int (replicas )
1859+ }, 3 * time .Minute , 5 * time .Second , "Pods did not converge to final revision/image" )
1860+
17881861 assert .Equal (t , finalMS .Status .CurrentRevision , finalMS .Status .UpdateRevision ,
17891862 "Without partition, CurrentRevision and UpdateRevision should converge after full update" )
17901863 assert .Equal (t , replicas , finalMS .Status .UpdatedReplicas ,
0 commit comments