@@ -957,7 +957,7 @@ func waitForPodsByLabel(
957957 return false
958958 }
959959 return verify (pods .Items )
960- }, timeout , 5 * time .Second , failureMsg )
960+ }, timeout , 1 * time .Second , failureMsg )
961961}
962962
963963// createRole is a helper function to create a Role with specified replicas and workers
@@ -1391,13 +1391,11 @@ func TestModelServingPartitionBoundaryProtection(t *testing.T) {
13911391
13921392 // Verify Status.CurrentRevision and Status.UpdateRevision.
13931393 t .Log ("Verifying Status.CurrentRevision and Status.UpdateRevision after partitioned update" )
1394- var finalMS * workload.ModelServing
13951394 require .Eventually (t , func () bool {
13961395 ms , err := kthenaClient .WorkloadV1alpha1 ().ModelServings (testNamespace ).Get (ctx , modelServing .Name , metav1.GetOptions {})
13971396 if err != nil {
13981397 return false
13991398 }
1400- finalMS = ms
14011399
14021400 t .Logf ("CurrentRevision: %s, UpdateRevision: %s, UpdatedReplicas: %d" ,
14031401 ms .Status .CurrentRevision , ms .Status .UpdateRevision , ms .Status .UpdatedReplicas )
@@ -1414,51 +1412,11 @@ func TestModelServingPartitionBoundaryProtection(t *testing.T) {
14141412 return ms .Status .UpdatedReplicas == (replicas - partition )
14151413 }, 3 * time .Minute , 5 * time .Second , "Revision status fields incorrect after partitioned update" )
14161414
1417- assert .Equal (t , initialCurrentRevision , finalMS .Status .CurrentRevision ,
1418- "CurrentRevision should remain the initial revision" )
1419- assert .NotEqual (t , finalMS .Status .CurrentRevision , finalMS .Status .UpdateRevision ,
1420- "CurrentRevision and UpdateRevision should differ during partitioned update" )
1421-
14221415 // Verify per-ordinal revision labels and images.
14231416 t .Log ("Verifying per-ordinal revisions and images" )
14241417 labelSelector := modelServingLabelSelector (modelServing .Name )
14251418 waitForPodsByLabel (t , ctx , kubeClient , labelSelector , 3 * time .Minute , func (pods []corev1.Pod ) bool {
1426- protectedCorrect := 0
1427- updatedCorrect := 0
1428-
1429- for _ , pod := range pods {
1430- if pod .DeletionTimestamp != nil || pod .Status .Phase != corev1 .PodRunning {
1431- continue
1432- }
1433-
1434- groupName := pod .Labels ["modelserving.volcano.sh/group-name" ]
1435- _ , ordinal := getGroupOrdinal (groupName )
1436- if ordinal < 0 {
1437- continue
1438- }
1439-
1440- podRevision := pod .Labels ["modelserving.volcano.sh/revision" ]
1441- containerImage := getPodContainerImage (pod , "test-container" )
1442-
1443- if ordinal < int (partition ) {
1444- // Protected ordinals: revision = CurrentRevision, image = old
1445- if podRevision == finalMS .Status .CurrentRevision && containerImage == nginxImage {
1446- protectedCorrect ++
1447- } else {
1448- t .Logf ("Protected pod %s (ordinal %d): revision=%s (want %s), image=%s (want %s)" ,
1449- pod .Name , ordinal , podRevision , finalMS .Status .CurrentRevision , containerImage , nginxImage )
1450- }
1451- } else {
1452- // Updated ordinals: revision = UpdateRevision, image = new
1453- if podRevision == finalMS .Status .UpdateRevision && containerImage == "nginx:alpine" {
1454- updatedCorrect ++
1455- } else {
1456- t .Logf ("Updated pod %s (ordinal %d): revision=%s (want %s), image=%s (want nginx:alpine)" ,
1457- pod .Name , ordinal , podRevision , finalMS .Status .UpdateRevision , containerImage )
1458- }
1459- }
1460- }
1461-
1419+ protectedCorrect , updatedCorrect := verifyPartitionState (t , pods , int (partition ), nginxImage , "nginx:alpine" )
14621420 t .Logf ("Protected correct: %d/3, Updated correct: %d/2" , protectedCorrect , updatedCorrect )
14631421 return protectedCorrect == 3 && updatedCorrect == 2
14641422 }, "Per-ordinal revision/image verification failed" )
@@ -1511,20 +1469,7 @@ func TestModelServingPartitionDeletedGroupHistoricalRevision(t *testing.T) {
15111469 // Verify the partitioned state is established: R-0,R-1,R-2 have old image, R-3,R-4 have new
15121470 labelSelector := modelServingLabelSelector (modelServing .Name )
15131471 waitForPodsByLabel (t , ctx , kubeClient , labelSelector , 3 * time .Minute , func (pods []corev1.Pod ) bool {
1514- protectedOld , updatedNew := 0 , 0
1515- for _ , pod := range pods {
1516- if pod .DeletionTimestamp != nil {
1517- continue
1518- }
1519- groupName := pod .Labels ["modelserving.volcano.sh/group-name" ]
1520- _ , ordinal := getGroupOrdinal (groupName )
1521- image := getPodContainerImage (pod , "test-container" )
1522- if ordinal < int (partition ) && image == nginxImage {
1523- protectedOld ++
1524- } else if ordinal >= int (partition ) && image == "nginx:alpine" {
1525- updatedNew ++
1526- }
1527- }
1472+ protectedOld , updatedNew := verifyPartitionState (t , pods , int (partition ), nginxImage , "nginx:alpine" )
15281473 return protectedOld == 3 && updatedNew == 2
15291474 }, "Failed to reach partitioned state" )
15301475
@@ -1614,30 +1559,17 @@ func TestModelServingPartitionDeletedGroupHistoricalRevision(t *testing.T) {
16141559 // Also verify the overall state is still correct: 3 protected + 2 updated
16151560 t .Log ("Verifying overall partition state is preserved after pod recreation" )
16161561 waitForPodsByLabel (t , ctx , kubeClient , labelSelector , 3 * time .Minute , func (pods []corev1.Pod ) bool {
1617- protectedOld , updatedNew := 0 , 0
1618- for _ , pod := range pods {
1619- if pod .DeletionTimestamp != nil || pod .Status .Phase != corev1 .PodRunning {
1620- continue
1621- }
1622- groupName := pod .Labels ["modelserving.volcano.sh/group-name" ]
1623- _ , ordinal := getGroupOrdinal (groupName )
1624- image := getPodContainerImage (pod , "test-container" )
1625- if ordinal >= 0 && ordinal < int (partition ) && image == nginxImage {
1626- protectedOld ++
1627- } else if ordinal >= int (partition ) && image == "nginx:alpine" {
1628- updatedNew ++
1629- }
1630- }
1562+ protectedOld , updatedNew := verifyPartitionState (t , pods , int (partition ), nginxImage , "nginx:alpine" )
16311563 t .Logf ("Protected with old image: %d/3, Updated with new image: %d/2" , protectedOld , updatedNew )
16321564 return protectedOld == 3 && updatedNew == 2
16331565 }, "Overall partition state broken after pod recreation" )
16341566
16351567 t .Log ("ModelServing partition deleted group historical revision test passed successfully" )
16361568}
16371569
1638- // TestModelServingNoPartitionRollingUpdate verifies default rolling-update behavior
1570+ // TestModelServingRollingUpdate verifies default rolling-update behavior
16391571// when partition is nil: all replicas move to the new revision and image.
1640- func TestModelServingNoPartitionRollingUpdate (t * testing.T ) {
1572+ func TestModelServingRollingUpdate (t * testing.T ) {
16411573 ctx , kthenaClient , kubeClient := setupControllerManagerE2ETest (t )
16421574
16431575 // Create a ModelServing with 4 replicas and NO partition (default behavior)
@@ -1738,7 +1670,7 @@ func TestModelServingNoPartitionRollingUpdate(t *testing.T) {
17381670 assert .Equal (t , replicas , finalMS .Status .UpdatedReplicas ,
17391671 "All replicas should be updated when no partition is set" )
17401672
1741- t .Log ("ModelServing no-partition rolling update test passed successfully" )
1673+ t .Log ("ModelServing rolling update test passed successfully" )
17421674}
17431675
17441676// getGroupOrdinal extracts the ordinal from a ServingGroup name (e.g., "test-ms-3" -> 3).
@@ -1758,6 +1690,30 @@ func getGroupOrdinal(groupName string) (string, int) {
17581690 return parent , ordinal
17591691}
17601692
1693+ // verifyPartitionState checks that pods below partition have oldImage and pods at/above partition have newImage.
1694+ // Returns (protectedCount, updatedCount) of correctly configured running pods.
1695+ func verifyPartitionState (t * testing.T , pods []corev1.Pod , partition int , oldImage , newImage string ) (int , int ) {
1696+ t .Helper ()
1697+ protectedCorrect , updatedCorrect := 0 , 0
1698+ for _ , pod := range pods {
1699+ if pod .DeletionTimestamp != nil || pod .Status .Phase != corev1 .PodRunning {
1700+ continue
1701+ }
1702+ groupName := pod .Labels ["modelserving.volcano.sh/group-name" ]
1703+ _ , ordinal := getGroupOrdinal (groupName )
1704+ if ordinal < 0 {
1705+ continue
1706+ }
1707+ image := getPodContainerImage (pod , "test-container" )
1708+ if ordinal < partition && image == oldImage {
1709+ protectedCorrect ++
1710+ } else if ordinal >= partition && image == newImage {
1711+ updatedCorrect ++
1712+ }
1713+ }
1714+ return protectedCorrect , updatedCorrect
1715+ }
1716+
17611717func createRollingUpdateModelServing (name string , servingGroupReplicas int32 , partition * int32 ) * workload.ModelServing {
17621718 roleReplicas := int32 (1 )
17631719 rollingUpdateConfig := & workload.RollingUpdateConfiguration {
@@ -1835,8 +1791,6 @@ func TestModelServingControllerManagerRestart(t *testing.T) {
18351791 _ = kthenaClient .WorkloadV1alpha1 ().ModelServings (testNamespace ).Delete (cleanupCtx , modelServing .Name , metav1.DeleteOptions {})
18361792 })
18371793
1838- // ModelServing Partition Revision Control
1839-
18401794 // Wait briefly for initial reconciliation to start
18411795 t .Log ("Waiting for initial reconciliation to start..." )
18421796 // Wait for a random duration between 0 and 3 seconds (in 100ms increments)
0 commit comments