refactor

pkazmierczak · pkazmierczak · commit 8e1036357778 · 2025-10-16T18:35:48.000+02:00
diff --git a/scheduler/reconciler/reconcile_node.go b/scheduler/reconciler/reconcile_node.go
@@ -62,17 +62,11 @@ func (nr *NodeReconciler) Compute(
 	compatHadExistingDeployment := nr.DeploymentCurrent != nil
 
 	result := new(NodeReconcileResult)
-	var deploymentComplete bool
 	for nodeID, allocs := range nodeAllocs {
-		diff, deploymentCompleteForNode := nr.computeForNode(job, nodeID, eligibleNodes,
+		diff := nr.computeForNode(job, nodeID, eligibleNodes,
 			notReadyNodes, taintedNodes, required, allocs, terminal,
 			serverSupportsDisconnectedClients)
 		result.Append(diff)
-
-		deploymentComplete = deploymentCompleteForNode
-		if deploymentComplete {
-			break
-		}
 	}
 
 	// COMPAT(1.14.0) prevent a new deployment from being created in the case
@@ -83,8 +77,6 @@ func (nr *NodeReconciler) Compute(
 		nr.DeploymentCurrent = nil
 	}
 
-	nr.DeploymentUpdates = append(nr.DeploymentUpdates, nr.setDeploymentStatusAndUpdates(deploymentComplete, job)...)
-
 	return result
 }
 
@@ -102,8 +94,7 @@ func (nr *NodeReconciler) Compute(
 // 8. those that may still be running on a node that has resumed reconnected.
 //
 // This method mutates the NodeReconciler fields, and returns a new
-// NodeReconcilerResult object and a boolean to indicate wither the deployment
-// is complete or not.
+// NodeReconcilerResult object.
 func (nr *NodeReconciler) computeForNode(
 	job *structs.Job, // job whose allocs are going to be diff-ed
 	nodeID string,
@@ -114,7 +105,7 @@ func (nr *NodeReconciler) computeForNode(
 	liveAllocs []*structs.Allocation, // non-terminal allocations that exist
 	terminal structs.TerminalByNodeByName, // latest terminal allocations (by node, id)
 	serverSupportsDisconnectedClients bool, // flag indicating whether to apply disconnected client logic
-) (*NodeReconcileResult, bool) {
+) *NodeReconcileResult {
 	result := new(NodeReconcileResult)
 
 	// cancel deployments that aren't needed anymore
@@ -322,10 +313,6 @@ func (nr *NodeReconciler) computeForNode(
 		})
 	}
 
-	// as we iterate over require groups, we'll keep track of whether the
-	// deployment is complete or not
-	deploymentComplete := false
-
 	// Scan the required groups
 	for name, tg := range required {
 
@@ -343,7 +330,6 @@ func (nr *NodeReconciler) computeForNode(
 				dstate.AutoPromote = tg.Update.AutoPromote
 				dstate.ProgressDeadline = tg.Update.ProgressDeadline
 			}
-			dstate.DesiredTotal = len(eligibleNodes)
 		}
 
 		// Check for an existing allocation
@@ -405,7 +391,6 @@ func (nr *NodeReconciler) computeForNode(
 
 		// check if deployment is place ready or complete
 		deploymentPlaceReady := !deploymentPaused && !deploymentFailed
-		deploymentComplete = nr.isDeploymentComplete(tg.Name, result, isCanarying[tg.Name])
 
 		// check if perhaps there's nothing else to do for this TG
 		if existingDeployment ||
@@ -426,7 +411,7 @@ func (nr *NodeReconciler) computeForNode(
 		}
 	}
 
-	return result, deploymentComplete
+	return result
 }
 
 func (nr *NodeReconciler) createDeployment(job *structs.Job, tg *structs.TaskGroup,
@@ -485,74 +470,6 @@ func (nr *NodeReconciler) createDeployment(job *structs.Job, tg *structs.TaskGro
 	nr.DeploymentCurrent.TaskGroups[tg.Name] = dstate
 }
 
-func (nr *NodeReconciler) isDeploymentComplete(groupName string, buckets *NodeReconcileResult, isCanarying bool) bool {
-	complete := len(buckets.Place)+len(buckets.Migrate)+len(buckets.Update) == 0
-
-	if !complete || nr.DeploymentCurrent == nil || isCanarying {
-		return false
-	}
-
-	// ensure everything is healthy
-	if dstate, ok := nr.DeploymentCurrent.TaskGroups[groupName]; ok {
-		if dstate.HealthyAllocs < dstate.DesiredTotal { // Make sure we have enough healthy allocs
-			complete = false
-		}
-	}
-
-	return complete
-}
-
-func (nr *NodeReconciler) setDeploymentStatusAndUpdates(deploymentComplete bool, job *structs.Job) []*structs.DeploymentStatusUpdate {
-	statusUpdates := []*structs.DeploymentStatusUpdate{}
-
-	if d := nr.DeploymentCurrent; d != nil {
-
-		// Deployments that require promotion should have appropriate status set
-		// immediately, no matter their completness.
-		if d.RequiresPromotion() {
-			if d.HasAutoPromote() {
-				d.StatusDescription = structs.DeploymentStatusDescriptionRunningAutoPromotion
-			} else {
-				d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
-			}
-			return statusUpdates
-		}
-
-		// Mark the deployment as complete if possible
-		if deploymentComplete {
-			if job.IsMultiregion() {
-				// the unblocking/successful states come after blocked, so we
-				// need to make sure we don't revert those states
-				if d.Status != structs.DeploymentStatusUnblocking &&
-					d.Status != structs.DeploymentStatusSuccessful {
-					statusUpdates = append(statusUpdates, &structs.DeploymentStatusUpdate{
-						DeploymentID:      nr.DeploymentCurrent.ID,
-						Status:            structs.DeploymentStatusBlocked,
-						StatusDescription: structs.DeploymentStatusDescriptionBlocked,
-					})
-				}
-			} else {
-				statusUpdates = append(statusUpdates, &structs.DeploymentStatusUpdate{
-					DeploymentID:      nr.DeploymentCurrent.ID,
-					Status:            structs.DeploymentStatusSuccessful,
-					StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
-				})
-			}
-		}
-
-		// Mark the deployment as pending since its state is now computed.
-		if d.Status == structs.DeploymentStatusInitializing {
-			statusUpdates = append(statusUpdates, &structs.DeploymentStatusUpdate{
-				DeploymentID:      nr.DeploymentCurrent.ID,
-				Status:            structs.DeploymentStatusPending,
-				StatusDescription: structs.DeploymentStatusDescriptionPendingForPeer,
-			})
-		}
-	}
-
-	return statusUpdates
-}
-
 // materializeSystemTaskGroups is used to materialize all the task groups
 // a system or sysbatch job requires.
 func materializeSystemTaskGroups(job *structs.Job) map[string]*structs.TaskGroup {
diff --git a/scheduler/scheduler_system.go b/scheduler/scheduler_system.go
@@ -285,6 +285,38 @@ func (s *SystemScheduler) computeJobAllocs() error {
 		s.logger.Debug("reconciled current state with desired state", r.Fields()...)
 	}
 
+	// track if any of the task groups is doing a canary update now
+	isCanarying := map[string]bool{}
+	for _, tg := range s.job.TaskGroups {
+		if s.deployment == nil {
+			break
+		}
+
+		dstate, ok := s.deployment.TaskGroups[tg.Name]
+		if !ok {
+			continue
+		}
+
+		isCanarying[tg.Name] = !tg.Update.IsEmpty() && tg.Update.Canary > 0 && dstate != nil && !dstate.Promoted
+	}
+
+	// Initially, if the job requires canaries, we place all of them on all
+	// eligible nodes. At this point we know which nodes are feasible, so we
+	// evict unnedded canaries.
+	if err := s.evictUnneededCanaries(s.job, s.nodes, r); err != nil {
+		return fmt.Errorf("failed to evict canaries for job '%s': %v", s.eval.JobID, err)
+	}
+
+	// check if the deployment is complete
+	deploymentComplete := false
+	for _, tg := range s.job.TaskGroups {
+		groupComplete := s.isDeploymentComplete(tg.Name, r, isCanarying[tg.Name])
+		deploymentComplete = deploymentComplete && groupComplete
+	}
+
+	// adjust the deployment updates and set the right deployment status
+	nr.DeploymentUpdates = append(nr.DeploymentUpdates, s.setDeploymentStatusAndUpdates(deploymentComplete, s.job)...)
+
 	// Add the deployment changes to the plan
 	s.plan.Deployment = nr.DeploymentCurrent
 	s.plan.DeploymentUpdates = nr.DeploymentUpdates
@@ -336,13 +368,6 @@ func (s *SystemScheduler) computeJobAllocs() error {
 	// be limited by max_parallel
 	s.limitReached = evictAndPlace(s.ctx, s.job, r, sstructs.StatusAllocUpdating)
 
-	// Initially, if the job requires canaries, we place all of them on all
-	// eligible nodes. At this point we know which nodes are feasible, so we
-	// evict unnedded canaries.
-	if err := s.evictCanaries(s.job, s.nodes, r); err != nil {
-		return fmt.Errorf("failed to evict canaries for job '%s': %v", s.eval.JobID, err)
-	}
-
 	// Nothing remaining to do if placement is not required
 	if len(r.Place) == 0 {
 		if !s.job.Stopped() {
@@ -571,6 +596,13 @@ func (s *SystemScheduler) computePlacements(place []reconciler.AllocTuple, exist
 		}
 
 		s.plan.AppendAlloc(alloc, nil)
+
+		// we only now the total amountn of placements once we filter out
+		// infeasible nodes, so for system jobs we do it backwards a bit: the
+		// "desired" total is the total we were able to place.
+		if s.deployment != nil {
+			s.deployment.TaskGroups[tgName].DesiredTotal += 1
+		}
 	}
 
 	return nil
@@ -657,7 +689,7 @@ func evictAndPlace(ctx feasible.Context, job *structs.Job, diff *reconciler.Node
 
 // evictAndPlaceCanaries checks how many canaries are needed against the amount
 // of feasible nodes, and evicts unnecessary placements.
-func (s *SystemScheduler) evictCanaries(job *structs.Job, readyNodes []*structs.Node,
+func (s *SystemScheduler) evictUnneededCanaries(job *structs.Job, readyNodes []*structs.Node,
 	reconcileResult *reconciler.NodeReconcileResult) error {
 
 	if job.Stopped() {
@@ -733,3 +765,71 @@ func (s *SystemScheduler) evictCanaries(job *structs.Job, readyNodes []*structs.
 
 	return nil
 }
+
+func (s *SystemScheduler) isDeploymentComplete(groupName string, buckets *reconciler.NodeReconcileResult, isCanarying bool) bool {
+	complete := len(buckets.Place)+len(buckets.Migrate)+len(buckets.Update) == 0
+
+	if !complete || s.deployment == nil || isCanarying {
+		return false
+	}
+
+	// ensure everything is healthy
+	if dstate, ok := s.deployment.TaskGroups[groupName]; ok {
+		if dstate.HealthyAllocs < dstate.DesiredTotal { // Make sure we have enough healthy allocs
+			complete = false
+		}
+	}
+
+	return complete
+}
+
+func (s *SystemScheduler) setDeploymentStatusAndUpdates(deploymentComplete bool, job *structs.Job) []*structs.DeploymentStatusUpdate {
+	statusUpdates := []*structs.DeploymentStatusUpdate{}
+
+	if d := s.deployment; d != nil {
+
+		// Deployments that require promotion should have appropriate status set
+		// immediately, no matter their completness.
+		if d.RequiresPromotion() {
+			if d.HasAutoPromote() {
+				d.StatusDescription = structs.DeploymentStatusDescriptionRunningAutoPromotion
+			} else {
+				d.StatusDescription = structs.DeploymentStatusDescriptionRunningNeedsPromotion
+			}
+			return statusUpdates
+		}
+
+		// Mark the deployment as complete if possible
+		if deploymentComplete {
+			if job.IsMultiregion() {
+				// the unblocking/successful states come after blocked, so we
+				// need to make sure we don't revert those states
+				if d.Status != structs.DeploymentStatusUnblocking &&
+					d.Status != structs.DeploymentStatusSuccessful {
+					statusUpdates = append(statusUpdates, &structs.DeploymentStatusUpdate{
+						DeploymentID:      s.deployment.ID,
+						Status:            structs.DeploymentStatusBlocked,
+						StatusDescription: structs.DeploymentStatusDescriptionBlocked,
+					})
+				}
+			} else {
+				statusUpdates = append(statusUpdates, &structs.DeploymentStatusUpdate{
+					DeploymentID:      s.deployment.ID,
+					Status:            structs.DeploymentStatusSuccessful,
+					StatusDescription: structs.DeploymentStatusDescriptionSuccessful,
+				})
+			}
+		}
+
+		// Mark the deployment as pending since its state is now computed.
+		if d.Status == structs.DeploymentStatusInitializing {
+			statusUpdates = append(statusUpdates, &structs.DeploymentStatusUpdate{
+				DeploymentID:      s.deployment.ID,
+				Status:            structs.DeploymentStatusPending,
+				StatusDescription: structs.DeploymentStatusDescriptionPendingForPeer,
+			})
+		}
+	}
+
+	return statusUpdates
+}