From 202cb10b662de6d69212132dbbceb70d8a660da6 Mon Sep 17 00:00:00 2001 From: Manikandan R Date: Fri, 22 Aug 2025 15:52:55 +0530 Subject: [PATCH] [YUNIKORN-3113] Resource-wise preemption --- pkg/common/resources/resources.go | 23 +++ pkg/common/resources/resources_test.go | 27 +++ pkg/scheduler/objects/preemption.go | 76 +++++-- pkg/scheduler/objects/preemption_test.go | 246 ++++++++++++++++++++++- 4 files changed, 352 insertions(+), 20 deletions(-) diff --git a/pkg/common/resources/resources.go b/pkg/common/resources/resources.go index 04a956c10..780bd6a08 100644 --- a/pkg/common/resources/resources.go +++ b/pkg/common/resources/resources.go @@ -1156,3 +1156,26 @@ func (r *Resource) DominantResourceType(capacity *Resource) string { } return dominant } + +// ExtractLatestIfModified Extract the latest value for every resource type only if value has been modified +// If both resources are nil, return nil resource +// If no resource type matches, return nil resource +// If resource type exists and none modified, return nil resource +func ExtractLatestIfModified(old *Resource, new *Resource) *Resource { + if old == nil || new == nil { + return nil + } + latest := NewResource() + for k, v := range old.Resources { + if newValue, exists := new.Resources[k]; exists { + if v != newValue { + latest.Resources[k] = newValue + } + } + } + if len(latest.Resources) > 0 { + return latest + } else { + return nil + } +} diff --git a/pkg/common/resources/resources_test.go b/pkg/common/resources/resources_test.go index d9766ea77..425632c73 100644 --- a/pkg/common/resources/resources_test.go +++ b/pkg/common/resources/resources_test.go @@ -2332,3 +2332,30 @@ func TestResource_Prune(t *testing.T) { }) } } + +func TestResource_ExtractLatestIfModified(t *testing.T) { + var tests = []struct { + caseName string + old *Resource + new *Resource + latest *Resource + }{ + {"nil case", nil, nil, nil}, + {"nil with non empty", nil, NewResourceFromMap(map[string]Quantity{"first": 1}), nil}, + {"non empty case", NewResourceFromMap(map[string]Quantity{"first": 1}), NewResourceFromMap(map[string]Quantity{"first": 2}), NewResourceFromMap(map[string]Quantity{"first": 2})}, + {"non empty case, reversal", NewResourceFromMap(map[string]Quantity{"first": 2}), NewResourceFromMap(map[string]Quantity{"first": 1}), NewResourceFromMap(map[string]Quantity{"first": 1})}, + {"non empty case, only one exist with different value", NewResourceFromMap(map[string]Quantity{"first": 1, "second": 2}), NewResourceFromMap(map[string]Quantity{"second": 3}), NewResourceFromMap(map[string]Quantity{"second": 3})}, + {"non empty case, only one exist with same value", NewResourceFromMap(map[string]Quantity{"first": 1, "second": 2}), NewResourceFromMap(map[string]Quantity{"second": 2}), nil}, + {"non empty case, disjoint sets and type doesn't exist", NewResourceFromMap(map[string]Quantity{"first": 1}), NewResourceFromMap(map[string]Quantity{"second": 2}), nil}, + } + for _, tt := range tests { + t.Run(tt.caseName, func(t *testing.T) { + latest := ExtractLatestIfModified(tt.old, tt.new) + if tt.latest == nil { + assert.Equal(t, tt.latest, latest) + } else { + assert.Assert(t, DeepEquals(tt.latest, latest), "resource type maps are not equal") + } + }) + } +} diff --git a/pkg/scheduler/objects/preemption.go b/pkg/scheduler/objects/preemption.go index 8751000fd..6eec4652f 100644 --- a/pkg/scheduler/objects/preemption.go +++ b/pkg/scheduler/objects/preemption.go @@ -219,6 +219,7 @@ func (p *Preemptor) checkPreemptionQueueGuarantees() bool { return false } + oldRemaining := currentQueue.GetRemainingGuaranteedResource() currentQueue.AddAllocation(p.ask.GetAllocatedResource()) // remove each allocation in turn, validating that at some point we free enough resources to allow this ask to fit @@ -226,7 +227,12 @@ func (p *Preemptor) checkPreemptionQueueGuarantees() bool { for _, alloc := range snapshot.PotentialVictims { snapshot.RemoveAllocation(alloc.GetAllocatedResource()) remaining := currentQueue.GetRemainingGuaranteedResource() - if remaining != nil && resources.StrictlyGreaterThanOrEquals(remaining, resources.Zero) { + + // Net remaining guaranteed is nothing but the latest remaining guaranteed value resulted after + // the removal of victim on ask queue. In case of res type not used in victim but defined in ask queue's guaranteed, + // Net remaining value ensures it has only RELEVANT res types and not res types not being touched at all from victim context. + netRemaining := resources.ExtractLatestIfModified(oldRemaining, remaining) + if remaining != nil && resources.StrictlyGreaterThanOrEquals(netRemaining, resources.Zero) { return true } } @@ -272,21 +278,32 @@ func (p *Preemptor) calculateVictimsByNode(nodeAvailable *resources.Resource, po queueSnapshot.RemoveAllocation(victim.GetAllocatedResource()) preemptableResource := queueSnapshot.GetPreemptableResource() + // Net remaining guaranteed is nothing but the latest remaining guaranteed value resulted after + // the removal of victim on victim queue. In case of res type not used in victim but defined in victim queue's guaranteed, + // Net remaining value ensures it has only RELEVANT res types and not res types not being touched at all from victim context. + netRemaining := resources.ExtractLatestIfModified(oldRemaining, queueSnapshot.GetRemainingGuaranteedResource()) + // Did removing this allocation still keep the queue over-allocated? // At times, over-allocation happens because of resource types in usage but not defined as guaranteed. - // So, as an additional check, -ve remaining guaranteed resource before removing the victim means + // So, as an additional check, -ve or zero net remaining guaranteed resource means // some really useful victim is there. // In case of victims densely populated on any specific node, checking/honouring the guaranteed quota on ask or preemptor queue // acts as early filtering layer to carry forward only the required victims. // For other cases like victims spread over multiple nodes, this doesn't add great value. if resources.StrictlyGreaterThanOrEquals(preemptableResource, resources.Zero) && - (oldRemaining == nil || resources.StrictlyGreaterThan(resources.Zero, oldRemaining)) { + (netRemaining == nil || resources.StrictlyGreaterThanOrEquals(resources.Zero, netRemaining)) { // add the current victim into the ask queue + askQueueOldRemaining := askQueue.GetRemainingGuaranteedResource() askQueue.AddAllocation(victim.GetAllocatedResource()) askQueueNewRemaining := askQueue.GetRemainingGuaranteedResource() + // Net remaining guaranteed is nothing but the latest remaining guaranteed value resulted after + // the removal of victim on ask queue. In case of res type not used in victim but defined in ask queue's guaranteed, + // Net remaining value ensures it has only RELEVANT res types and not res types not being touched at all from victim context. + askQueueNetRemaining := resources.ExtractLatestIfModified(askQueueOldRemaining, askQueueNewRemaining) + // Did adding this allocation make the ask queue over - utilized? - if askQueueNewRemaining != nil && resources.StrictlyGreaterThan(resources.Zero, askQueueNewRemaining) { + if askQueueNewRemaining != nil && resources.StrictlyGreaterThan(resources.Zero, askQueueNetRemaining) { askQueue.RemoveAllocation(victim.GetAllocatedResource()) queueSnapshot.AddAllocation(victim.GetAllocatedResource()) break @@ -344,13 +361,18 @@ func (p *Preemptor) calculateVictimsByNode(nodeAvailable *resources.Resource, po queueSnapshot.RemoveAllocation(victim.GetAllocatedResource()) preemptableResource := queueSnapshot.GetPreemptableResource() + // Net remaining guaranteed is nothing but the latest remaining guaranteed value resulted after + // the removal of victim on victim's queue. In case of res type not used in victim but defined in victim queue's guaranteed, + // Net remaining value ensures it has only RELEVANT res types and not res types not being touched at all from victim context. + netRemaining := resources.ExtractLatestIfModified(oldRemaining, queueSnapshot.GetRemainingGuaranteedResource()) + // Did removing this allocation still keep the queue over-allocated? // At times, over-allocation happens because of resource types in usage but not defined as guaranteed. - // So, as an additional check, -ve remaining guaranteed resource before removing the victim means + // So, as an additional check, -ve or zero net remaining guaranteed resource means // some really useful victim is there. // Similar checks could be added even on the ask or preemptor queue to prevent being over utilized. if resources.StrictlyGreaterThanOrEquals(preemptableResource, resources.Zero) && - (oldRemaining == nil || resources.StrictlyGreaterThan(resources.Zero, oldRemaining)) { + (netRemaining == nil || resources.StrictlyGreaterThanOrEquals(resources.Zero, netRemaining)) { // removing task does not violate queue constraints, adjust queue and node nodeCurrentAvailable.AddTo(victim.GetAllocatedResource()) // check if ask now fits and we haven't had this happen before @@ -493,6 +515,8 @@ func (p *Preemptor) calculateAdditionalVictims(nodeVictims []*Allocation) ([]*Al return compareAllocationLess(potentialVictims[i], potentialVictims[j]) }) + askQueueRemaining := askQueue.GetRemainingGuaranteedResource() + // evaluate each potential victim in turn, stopping once sufficient resources have been freed victims := make([]*Allocation, 0) for _, victim := range potentialVictims { @@ -502,20 +526,31 @@ func (p *Preemptor) calculateAdditionalVictims(nodeVictims []*Allocation) ([]*Al oldRemaining := queueSnapshot.GetRemainingGuaranteedResource() queueSnapshot.RemoveAllocation(victim.GetAllocatedResource()) + // Net remaining guaranteed is nothing but the latest remaining guaranteed value resulted after + // the removal of victim on victim's queue. In case of res type not used in victim but defined in victim queue's guaranteed, + // Net remaining value ensures it has only RELEVANT res types and not res types not being touched at all from victim context. + netRemaining := resources.ExtractLatestIfModified(oldRemaining, queueSnapshot.GetRemainingGuaranteedResource()) + // Did removing this allocation still keep the queue over-allocated? // At times, over-allocation happens because of resource types in usage but not defined as guaranteed. - // So, as an additional check, -ve remaining guaranteed resource before removing the victim means + // So, as an additional check, -ve or zero net remaining guaranteed resource means // some really useful victim is there. preemptableResource := queueSnapshot.GetPreemptableResource() if resources.StrictlyGreaterThanOrEquals(preemptableResource, resources.Zero) && - (oldRemaining == nil || resources.StrictlyGreaterThan(resources.Zero, oldRemaining)) { + (netRemaining == nil || resources.StrictlyGreaterThanOrEquals(resources.Zero, netRemaining)) { askQueueRemainingAfterVictimRemoval := askQueue.GetRemainingGuaranteedResource() // add the current victim into the ask queue askQueue.AddAllocation(victim.GetAllocatedResource()) askQueueNewRemaining := askQueue.GetRemainingGuaranteedResource() + + // Net remaining guaranteed is nothing but the latest remaining guaranteed value resulted after + // the removal of victim on ask queue. In case of res type not used in victim but defined in ask queue's guaranteed, + // Net remaining value ensures it has only RELEVANT res types and not res types not being touched at all from victim context. + askQueueNetRemaining := resources.ExtractLatestIfModified(askQueueRemainingAfterVictimRemoval, askQueueNewRemaining) + // Did adding this allocation make the ask queue over - utilized? - if askQueueNewRemaining != nil && resources.StrictlyGreaterThan(resources.Zero, askQueueNewRemaining) { + if askQueueNewRemaining != nil && resources.StrictlyGreaterThan(resources.Zero, askQueueNetRemaining) { askQueue.RemoveAllocation(victim.GetAllocatedResource()) queueSnapshot.AddAllocation(victim.GetAllocatedResource()) break @@ -536,12 +571,22 @@ func (p *Preemptor) calculateAdditionalVictims(nodeVictims []*Allocation) ([]*Al } } } - // At last, did the ask queue usage under or equals guaranteed quota? - finalRemainingRes := askQueue.GetRemainingGuaranteedResource() - if finalRemainingRes != nil && resources.StrictlyGreaterThanOrEquals(finalRemainingRes, resources.Zero) { - return victims, true + + // At last, did the ask queue usage under or equals guaranteed quota after finding the additional victims? + if len(victims) > 0 { + finalRemainingRes := askQueue.GetRemainingGuaranteedResource() + + // Net remaining guaranteed is nothing but the latest remaining guaranteed value resulted after + // the removal of victim on ask queue. In case of res type not used in victim but defined in ask queue's guaranteed, + // Net remaining value ensures it has only RELEVANT res types and not res types not being touched at all from victim context. + netRemaining := resources.ExtractLatestIfModified(askQueueRemaining, finalRemainingRes) + if finalRemainingRes != nil && resources.StrictlyGreaterThanOrEquals(netRemaining, resources.Zero) { + return victims, true + } else { + return victims, false + } } - return nil, false + return nil, true } // tryNodes attempts to find potential nodes for scheduling. For each node, potential victims are passed to @@ -682,7 +727,8 @@ func (p *Preemptor) TryPreemption() (*AllocationResult, bool) { log.Log(log.SchedPreemption).Info("Reserving node for ask after preemption", zap.String("allocationKey", p.ask.GetAllocationKey()), zap.String("nodeID", nodeID), - zap.Int("victimCount", len(victims))) + zap.Int("collected victim count", len(victims)), + zap.Int("preempted victim count", len(finalVictims))) return newReservedAllocationResult(nodeID, p.ask), true } diff --git a/pkg/scheduler/objects/preemption_test.go b/pkg/scheduler/objects/preemption_test.go index a88e1bdc4..0823dca1d 100644 --- a/pkg/scheduler/objects/preemption_test.go +++ b/pkg/scheduler/objects/preemption_test.go @@ -43,7 +43,17 @@ func creatApp1( node2 *Node, app1Rec map[string]resources.Quantity, ) (*Allocation, *Allocation, error) { - app1 := newApplication(appID1, "default", "root.parent.child1") + return creatApp1WithTwoDifferentAllocations(childQ1, node1, node2, app1Rec, app1Rec) +} + +func creatApp1WithTwoDifferentAllocations( + childQ1 *Queue, + node1 *Node, + node2 *Node, + app1Rec map[string]resources.Quantity, + app2Rec map[string]resources.Quantity, +) (*Allocation, *Allocation, error) { + app1 := newApplication(appID1, "default", childQ1.QueuePath) app1.SetQueue(childQ1) childQ1.applications[appID1] = app1 @@ -52,7 +62,7 @@ func creatApp1( if err := app1.AddAllocationAsk(ask1); err != nil { return nil, nil, err } - ask2 := newAllocationAsk("alloc2", appID1, resources.NewResourceFromMap(app1Rec)) + ask2 := newAllocationAsk("alloc2", appID1, resources.NewResourceFromMap(app2Rec)) ask2.createTime = time.Now() if err := app1.AddAllocationAsk(ask2); err != nil { return nil, nil, err @@ -66,14 +76,14 @@ func creatApp1( } var alloc2 *Allocation if node2 != nil { - alloc2 = newAllocationWithKey("alloc2", appID1, nodeID2, resources.NewResourceFromMap(app1Rec)) + alloc2 = newAllocationWithKey("alloc2", appID1, nodeID2, resources.NewResourceFromMap(app2Rec)) alloc2.createTime = ask2.createTime app1.AddAllocation(alloc2) if !node2.TryAddAllocation(alloc2) { return nil, nil, fmt.Errorf("node alloc2 failed") } } else { - alloc2 = newAllocationWithKey("alloc2", appID1, nodeID1, resources.NewResourceFromMap(app1Rec)) + alloc2 = newAllocationWithKey("alloc2", appID1, nodeID1, resources.NewResourceFromMap(app2Rec)) alloc2.createTime = ask2.createTime if !node1.TryAddAllocation(alloc2) { return nil, nil, fmt.Errorf("node alloc2 failed") @@ -95,7 +105,7 @@ func creatApp2( app2Res map[string]resources.Quantity, allocID string, ) (*Application, *Allocation, error) { - app2 := newApplication(appID2, "default", "root.parent.child2") + app2 := newApplication(appID2, "default", childQ2.QueuePath) app2.SetQueue(childQ2) childQ2.applications[appID2] = app2 ask3 := newAllocationAsk(allocID, appID2, resources.NewResourceFromMap(app2Res)) @@ -1831,3 +1841,229 @@ func TestTryPreemption_OnNode_UGParent_With_UGPreemptorChild_OGVictimChild_As_Si assert.Check(t, !alloc1.IsPreempted(), "alloc1 preempted") assert.Check(t, alloc2.IsPreempted(), "alloc2 not preempted") } + +// TestTryPreemption_VictimQueue_With_OG_And_UG_ResTypes Test try preemption with 2 level queue hierarchy. +// Guaranteed set on both victim queue path and preemptor queue path. +// Victim queue has used some res types only but not all defined in guaranteed. +// Request (Preemptor) resource type matches with victim's queue used resource types, but not all the resource types defined in guaranteed. +// Though not all res types defined in victim's guaranteed not being used and not has any relevance from preemptor ask resource requirement perspective, preemption should +// be triggerred and consider the victim queue as candidate and kill the victims whose resource type matches with ask res types. +// Setup: +// Nodes are Node1 & Node2. Node has enough space to accommodate the new ask. +// root.parent.child1. Guaranteed set on root.parent.child1, first: 10, second: 2 Allocations (belongs to single app) are running. Each Allocation usage is first:10. Total usage is first:20. +// root.parent.child2. Guaranteed set on root.parent.child2, first: 10. Request of first: 5 is waiting for resources. +// 1 Allocation on root.parent.child1 should be preempted to free up resources for ask arrived in root.parent.child2 even though some guaranteed resource types is not being used at all. +func TestTryPreemption_VictimQueue_With_OG_And_UG_ResTypes(t *testing.T) { + node1 := newNode(nodeID1, map[string]resources.Quantity{"first": 10, "second": 2}) + node2 := newNode(nodeID2, map[string]resources.Quantity{"first": 10, "second": 2}) + iterator := getNodeIteratorFn(node1, node2) + rootQ, err := createRootQueue(map[string]string{"first": "20", "storage": "4"}) + assert.NilError(t, err) + parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, map[string]string{"first": "20", "second": "4"}, nil) + assert.NilError(t, err) + childQ1, err := createManagedQueueGuaranteed(parentQ, "child1", false, nil, map[string]string{"first": "10", "second": "2"}) + assert.NilError(t, err) + childQ2, err := createManagedQueueGuaranteed(parentQ, "child2", false, nil, map[string]string{"first": "10"}) + assert.NilError(t, err) + + alloc1, alloc2, err := creatApp1(childQ1, node1, node2, map[string]resources.Quantity{"first": 10}) + assert.NilError(t, err) + + app2, ask3, err := creatApp2(childQ2, map[string]resources.Quantity{"first": 5}, "alloc3") + assert.NilError(t, err) + + headRoom := resources.NewResourceFromMap(map[string]resources.Quantity{"second": 4}) + preemptor := NewPreemptor(app2, headRoom, 30*time.Second, ask3, iterator(), false) + + result, ok := preemptor.TryPreemption() + assert.Assert(t, result != nil, "unexpected result") + assert.Equal(t, ok, true, "victims found") + assert.Check(t, alloc1.IsPreempted() || alloc2.IsPreempted(), "alloc1 or alloc2 preempted") +} + +// TestTryPreemption_VictimQueue_Under_Diff_Parent_With_OG_And_UG_ResTypes Test try preemption with 3 level queue hierarchy. +// Guaranteed set on both victim queue path and preemptor queue path. +// Victim queue has used some res types only but not all defined in guaranteed. +// Request (Preemptor) resource type matches with victim's queue used resource types, but not all the resource types defined in guaranteed. +// Though not all res types defined in victim's guaranteed not being used and not has any relevance from preemptor ask resource requirement perspective, preemption should +// be triggerred and consider the victim queue as candidate and kill the victims whose resource type matches with ask res types. +// Setup: +// Nodes are Node1 & Node2. Node has enough space to accommodate the new ask. +// root.parent.parent1.child1. Guaranteed set on root.parent.parent1.child1, first: 10, second: 2 Allocations (belongs to single app) are running. Each Allocation usage is first:10. Total usage is first:20. +// root.parent.parent2.child2. Guaranteed set on root.parent.parent2.child2, first: 10. Request of first: 5 is waiting for resources. +// 1 Allocation on root.parent.parent1.child1 should be preempted to free up resources for ask arrived in root.parent.parent2.child2 even though some guaranteed resource types of parent2 is not being used at all. +func TestTryPreemption_VictimQueue_Under_Diff_Parent_With_OG_And_UG_ResTypes(t *testing.T) { + var tests = []struct { + testName string + victimParentGuaranteed map[string]string + askParentGuaranteed map[string]string + victimChildGuaranteed map[string]string + askChildGuaranteed map[string]string + }{ + {"victim queue under parent different from ask queue with some OG res types", map[string]string{"first": "10", "second": "2"}, map[string]string{"first": "10"}, nil, nil}, + {"victim queue with some OG res types under parent different from ask queue", nil, nil, map[string]string{"first": "10", "second": "2"}, map[string]string{"first": "10"}}, + } + for _, tt := range tests { + t.Run(tt.testName, func(t *testing.T) { + node1 := newNode(nodeID1, map[string]resources.Quantity{"first": 10, "second": 2}) + node2 := newNode(nodeID2, map[string]resources.Quantity{"first": 10, "second": 2}) + iterator := getNodeIteratorFn(node1, node2) + rootQ, err := createRootQueue(map[string]string{"first": "20", "storage": "4"}) + assert.NilError(t, err) + parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, map[string]string{"first": "20", "second": "4"}, nil) + assert.NilError(t, err) + parentQ1, err := createManagedQueueGuaranteed(parentQ, "parent1", true, nil, tt.victimParentGuaranteed) + assert.NilError(t, err) + parentQ2, err := createManagedQueueGuaranteed(parentQ, "parent2", true, nil, tt.askParentGuaranteed) + assert.NilError(t, err) + childQ1, err := createManagedQueueGuaranteed(parentQ1, "child1", false, nil, tt.victimChildGuaranteed) + assert.NilError(t, err) + childQ2, err := createManagedQueueGuaranteed(parentQ2, "child2", false, nil, tt.askChildGuaranteed) + assert.NilError(t, err) + + alloc1, alloc2, err := creatApp1(childQ1, node1, node2, map[string]resources.Quantity{"first": 10}) + assert.NilError(t, err) + + app2, ask3, err := creatApp2(childQ2, map[string]resources.Quantity{"first": 5}, "alloc3") + assert.NilError(t, err) + + headRoom := resources.NewResourceFromMap(map[string]resources.Quantity{"second": 4}) + preemptor := NewPreemptor(app2, headRoom, 30*time.Second, ask3, iterator(), false) + + result, ok := preemptor.TryPreemption() + assert.Assert(t, result != nil, "unexpected result") + assert.Equal(t, ok, true, "victims found") + assert.Check(t, alloc1.IsPreempted() || alloc2.IsPreempted(), "alloc1 or alloc2 preempted") + }) + } +} + +// TestTryPreemption_AskQueue_With_OG_And_UG_ResTypes Test try preemption with 2 level queue hierarchy. +// Guaranteed set on both victim queue path and preemptor queue path. +// Ask queue has overused some res types only but not all defined in guaranteed, looking for UG resources. +// Though some res types defined in ask queue's guaranteed has been overused and not has any relevance from preemptor ask resource requirement perspective, preemption should +// be triggerred and pass through preliminary checkPreemptionQueueGuarantees checks. Just because ask has some OG res types, checkPreemptionQueueGuarantees should not fail. +// Setup: +// Nodes are Node1 & Node2. Node has enough space to accommodate the new ask. +// root.parent.child1. Guaranteed set on root.parent.child1, first: 10. 2 Allocations (belongs to single app) are running. First Allocation usage is first:10, Second Allocation usage is first:5. Total usage is first:15. +// root.parent.child2. Guaranteed set on root.parent.child2, first: 10, second: 2. 1 Allocation is running with usage as first:5, second: 3 (OG). Request of first: 5, third: 10 is waiting for resources. +// 1 Allocation on root.parent.child1 should be preempted to free up resources for ask arrived in root.parent.child2 even though it has overused few guaranteed resource types. +func TestTryPreemption_AskQueue_With_OG_And_UG_ResTypes(t *testing.T) { + node1 := newNode(nodeID1, map[string]resources.Quantity{"first": 10, "second": 3, "third": 10}) + node2 := newNode(nodeID2, map[string]resources.Quantity{"first": 10, "second": 3, "third": 10}) + iterator := getNodeIteratorFn(node1, node2) + rootQ, err := createRootQueue(map[string]string{"first": "20", "second": "6"}) + assert.NilError(t, err) + parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, map[string]string{"first": "20", "second": "6"}, nil) + assert.NilError(t, err) + childQ1, err := createManagedQueueGuaranteed(parentQ, "child1", false, nil, map[string]string{"first": "10"}) + assert.NilError(t, err) + childQ2, err := createManagedQueueGuaranteed(parentQ, "child2", false, nil, map[string]string{"first": "10", "second": "2"}) + assert.NilError(t, err) + + alloc1, alloc2, err := creatApp1WithTwoDifferentAllocations(childQ1, node1, node2, map[string]resources.Quantity{"first": 10}, map[string]resources.Quantity{"first": 5}) + assert.NilError(t, err) + + alloc3Res := resources.NewResourceFromMap(map[string]resources.Quantity{"first": 5, "second": 3}) + app2, ask3, err := creatApp2(childQ2, map[string]resources.Quantity{"first": 5, "second": 3}, "alloc3") + assert.NilError(t, err) + + alloc3 := newAllocationWithKey("alloc3", appID2, nodeID2, alloc3Res) + alloc3.createTime = ask3.createTime + app2.AddAllocation(alloc3) + if !node2.TryAddAllocation(alloc3) { + t.Fatal("node alloc3 failed") + } + if err = childQ2.TryIncAllocatedResource(ask3.GetAllocatedResource()); err != nil { + t.Fatal("inc queue resource failed") + } + app3 := newApplication(appID3, "default", "root.parent.child2") + app3.SetQueue(childQ2) + childQ2.applications[appID3] = app3 + + ask4 := newAllocationAsk("alloc4", appID3, resources.NewResourceFromMap(map[string]resources.Quantity{"first": 5, "third": 10})) + err = app3.AddAllocationAsk(ask4) + assert.NilError(t, err) + + headRoom := resources.NewResourceFromMap(map[string]resources.Quantity{"second": 6}) + preemptor := NewPreemptor(app3, headRoom, 30*time.Second, ask4, iterator(), false) + + result, ok := preemptor.TryPreemption() + assert.Assert(t, result != nil, "unexpected result") + assert.Equal(t, ok, true, "victims found") + assert.Check(t, alloc1.IsPreempted() || alloc2.IsPreempted(), "alloc1 or alloc2 preempted") +} + +// TestTryPreemption_AskQueue_Under_DiffParent_With_OG_And_UG_ResTypes Test try preemption with 3 level queue hierarchy. +// Guaranteed set on both victim queue path and preemptor queue path. +// Ask queue has overused some res types only but not all defined in guaranteed, looking for UG resources. +// Though some res types defined in ask queue's guaranteed has been overused and not has any relevance from preemptor ask resource requirement perspective, preemption should +// be triggerred and pass through preliminary checkPreemptionQueueGuarantees checks. Just because ask has some OG res types, checkPreemptionQueueGuarantees should not fail. +// Setup: +// Nodes are Node1 & Node2. Node has enough space to accommodate the new ask. +// root.parent.parent1.child1. Guaranteed set on root.parent.parent1.child1, first: 10. 2 Allocations (belongs to single app) are running. First Allocation usage is first:10, Second Allocation usage is first:5. Total usage is first:15. +// root.parent.parent2.child2. Guaranteed set on root.parent.parent2.child2, first: 10, second: 2. 1 Allocation is running with usage as first:5, second: 3 (OG). Request of first: 5, third: 10 is waiting for resources. +// 1 Allocation on root.parent.parent1.child1 should be preempted to free up resources for ask arrived in root.parent.parent2.child2 even though it has overused few guaranteed resource types. +func TestTryPreemption_AskQueue_Under_DiffParent_With_OG_And_UG_ResTypes(t *testing.T) { + var tests = []struct { + testName string + victimParentGuaranteed map[string]string + askParentGuaranteed map[string]string + victimChildGuaranteed map[string]string + askChildGuaranteed map[string]string + }{ + {"ask queue under parent different from victim queue with some OG res types", map[string]string{"first": "10"}, map[string]string{"first": "10", "second": "2"}, nil, nil}, + {"ask queue with some OG res types under parent different from victim queue", nil, nil, map[string]string{"first": "10"}, map[string]string{"first": "10", "second": "2"}}, + } + for _, tt := range tests { + t.Run(tt.testName, func(t *testing.T) { + node1 := newNode(nodeID1, map[string]resources.Quantity{"first": 10, "second": 3, "third": 10}) + node2 := newNode(nodeID2, map[string]resources.Quantity{"first": 10, "second": 3, "third": 10}) + iterator := getNodeIteratorFn(node1, node2) + rootQ, err := createRootQueue(map[string]string{"first": "20", "second": "6"}) + assert.NilError(t, err) + parentQ, err := createManagedQueueGuaranteed(rootQ, "parent", true, map[string]string{"first": "20", "second": "6"}, nil) + assert.NilError(t, err) + parentQ1, err := createManagedQueueGuaranteed(parentQ, "parent1", true, nil, tt.victimParentGuaranteed) + assert.NilError(t, err) + parentQ2, err := createManagedQueueGuaranteed(parentQ, "parent2", true, nil, tt.askParentGuaranteed) + assert.NilError(t, err) + childQ1, err := createManagedQueueGuaranteed(parentQ1, "child1", false, nil, tt.victimChildGuaranteed) + assert.NilError(t, err) + childQ2, err := createManagedQueueGuaranteed(parentQ2, "child2", false, nil, tt.askChildGuaranteed) + assert.NilError(t, err) + + alloc1, alloc2, err := creatApp1WithTwoDifferentAllocations(childQ1, node1, node2, map[string]resources.Quantity{"first": 10}, map[string]resources.Quantity{"first": 5}) + assert.NilError(t, err) + + alloc3Res := resources.NewResourceFromMap(map[string]resources.Quantity{"first": 5, "second": 3}) + app2, ask3, err := creatApp2(childQ2, map[string]resources.Quantity{"first": 5, "second": 3}, "alloc3") + assert.NilError(t, err) + + alloc3 := newAllocationWithKey("alloc3", appID2, nodeID2, alloc3Res) + alloc3.createTime = ask3.createTime + app2.AddAllocation(alloc3) + if !node2.TryAddAllocation(alloc3) { + t.Fatal("node alloc3 failed") + } + if err = childQ2.TryIncAllocatedResource(ask3.GetAllocatedResource()); err != nil { + t.Fatal("inc queue resource failed") + } + app3 := newApplication(appID3, "default", "root.parent.child2") + app3.SetQueue(childQ2) + childQ2.applications[appID3] = app3 + + ask4 := newAllocationAsk("alloc4", appID3, resources.NewResourceFromMap(map[string]resources.Quantity{"first": 5, "third": 10})) + err = app3.AddAllocationAsk(ask4) + assert.NilError(t, err) + + headRoom := resources.NewResourceFromMap(map[string]resources.Quantity{"second": 6}) + preemptor := NewPreemptor(app3, headRoom, 30*time.Second, ask4, iterator(), false) + + result, ok := preemptor.TryPreemption() + assert.Assert(t, result != nil, "unexpected result") + assert.Equal(t, ok, true, "victims found") + assert.Check(t, alloc1.IsPreempted() || alloc2.IsPreempted(), "alloc1 or alloc2 preempted") + }) + } +}