Skip to content

perf: respect multinode consolidation timeout in all cases #2025

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
b4cfac1
perf: respect multinode consolidation timeout in all cases
rschalo Feb 21, 2025
40dde20
put comments back
rschalo Feb 21, 2025
37323a4
inline clock.Now()
rschalo Feb 21, 2025
8753963
expand context deadline handling
rschalo Feb 24, 2025
d6ab2b6
fix comment
rschalo Feb 24, 2025
2c432e3
short circuit ctx canceled in solve
rschalo Feb 24, 2025
56a681d
Merge branch 'main' into multinode-consolidation-timeout-fix
rschalo Feb 24, 2025
b92e99b
fix spacing
rschalo Feb 24, 2025
767fbed
remove topology short circuit
rschalo Feb 25, 2025
43dfd0a
nolint gocyclo silence
rschalo Feb 25, 2025
f021362
add test for context deadline timeout
rschalo Feb 25, 2025
12d1adc
remove select block
rschalo Feb 25, 2025
9b552a8
fix comment
rschalo Feb 25, 2025
86ecbc3
remove redundant check
rschalo Feb 25, 2025
5521002
pr response for context use
rschalo Mar 4, 2025
e2dccfe
Merge branch 'main' into multinode-consolidation-timeout-fix
rschalo Mar 7, 2025
554a224
handle timeout in provisioning loop
rschalo Mar 7, 2025
c494ca5
mark all pods as not having been scheduled due to context errors
rschalo Mar 7, 2025
12fa309
Merge branch 'main' into multinode-consolidation-timeout-fix
rschalo Mar 7, 2025
f631c7f
add additional context handling in provisioner
rschalo Mar 7, 2025
0320177
reduce scope
rschalo Mar 7, 2025
fb32f53
remove unused function
rschalo Mar 7, 2025
3729641
return error directly
rschalo Mar 9, 2025
ff9f119
use timeout instead of context cancels
rschalo Mar 13, 2025
e77389d
Revert "use timeout instead of context cancels"
rschalo Mar 14, 2025
f8be51a
remove pod error and break out of loop if ctx err
rschalo Mar 14, 2025
1f6f9bf
remove binary from history
rschalo Mar 14, 2025
b153118
remove ctx check in nodePools
rschalo Mar 14, 2025
c56e10f
another pr round
rschalo Mar 18, 2025
6f0187c
reduce diff
rschalo Mar 18, 2025
b34e1b2
return error from solve
rschalo Mar 18, 2025
eb5849c
fix presubmit
rschalo Mar 18, 2025
05d24b7
fix test assertion
rschalo Mar 19, 2025
8120724
add comment for provisioning loop
rschalo Mar 19, 2025
209f725
updates for pr
rschalo Mar 19, 2025
f3d18de
more updates
rschalo Mar 19, 2025
3c7a50f
log change
rschalo Mar 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions pkg/controllers/disruption/multinodeconsolidation.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package disruption

import (
"context"
"errors"
"fmt"
"math"
"time"
Expand Down Expand Up @@ -107,6 +108,7 @@ func (m *MultiNodeConsolidation) ComputeCommand(ctx context.Context, disruptionB

// firstNConsolidationOption looks at the first N NodeClaims to determine if they can all be consolidated at once. The
// NodeClaims are sorted by increasing disruption order which correlates to likelihood of being able to consolidate the node
// nolint:gocyclo
func (m *MultiNodeConsolidation) firstNConsolidationOption(ctx context.Context, candidates []*Candidate, max int) (Command, scheduling.Results, error) {
// we always operate on at least two NodeClaims at once, for single NodeClaims standard consolidation will find all solutions
if len(candidates) < 2 {
Expand All @@ -120,22 +122,27 @@ func (m *MultiNodeConsolidation) firstNConsolidationOption(ctx context.Context,
lastSavedCommand := Command{}
lastSavedResults := scheduling.Results{}
// Set a timeout
timeout := m.clock.Now().Add(MultiNodeConsolidationTimeoutDuration)
// binary search to find the maximum number of NodeClaims we can terminate
timeoutCtx, cancel := context.WithDeadline(ctx, m.clock.Now().Add(MultiNodeConsolidationTimeoutDuration))
defer cancel()
for min <= max {
if m.clock.Now().After(timeout) {
// Check for timeout using select
if timeoutCtx.Err() != nil {
ConsolidationTimeoutsTotal.Inc(map[string]string{consolidationTypeLabel: m.ConsolidationType()})
if lastSavedCommand.candidates == nil {
log.FromContext(ctx).V(1).Info(fmt.Sprintf("failed to find a multi-node consolidation after timeout, last considered batch had %d", (min+max)/2))
} else {
log.FromContext(ctx).V(1).WithValues(lastSavedCommand.LogValues()...).Info("stopping multi-node consolidation after timeout, returning last valid command")
return Command{}, scheduling.Results{}, fmt.Errorf("multi-node consolidation timed out while considering %d nodes without finding a valid command", (min+max)/2)
}
log.FromContext(ctx).V(1).WithValues(lastSavedCommand.LogValues()...).Info("stopping multi-node consolidation after timeout, returning last valid command")
return lastSavedCommand, lastSavedResults, nil
}
mid := (min + max) / 2
candidatesToConsolidate := candidates[0 : mid+1]

cmd, results, err := m.computeConsolidation(ctx, candidatesToConsolidate...)
// Pass the timeout context to ensure sub-operations can be canceled
cmd, results, err := m.computeConsolidation(timeoutCtx, candidatesToConsolidate...)
// context deadline exceeded will return to the top of the loop and either return nothing or the last saved command
if errors.Is(err, context.DeadlineExceeded) {
continue
}
if err != nil {
return Command{}, scheduling.Results{}, err
}
Expand All @@ -149,6 +156,7 @@ func (m *MultiNodeConsolidation) firstNConsolidationOption(ctx context.Context,
}

// replacementHasValidInstanceTypes will be false if the replacement action has valid instance types remaining after filtering.

if replacementHasValidInstanceTypes || cmd.Decision() == DeleteDecision {
// We can consolidate NodeClaims [0,mid]
lastSavedCommand = cmd
Expand Down
56 changes: 56 additions & 0 deletions pkg/controllers/disruption/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2022,6 +2022,62 @@ var _ = Describe("Metrics", func() {
"consolidation_type": "multi",
})
})
It("should stop multi-node consolidation after context deadline is reached", func() {
nodeClaims, nodes = test.NodeClaimsAndNodes(3, v1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
v1.NodePoolLabelKey: nodePool.Name,
corev1.LabelInstanceTypeStable: mostExpensiveInstance.Name,
v1.CapacityTypeLabelKey: mostExpensiveOffering.Requirements.Get(v1.CapacityTypeLabelKey).Any(),
corev1.LabelTopologyZone: mostExpensiveOffering.Requirements.Get(corev1.LabelTopologyZone).Any(),
},
},
Status: v1.NodeClaimStatus{
Allocatable: map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: resource.MustParse("32"),
corev1.ResourcePods: resource.MustParse("100"),
},
},
})
for _, nc := range nodeClaims {
nc.StatusConditions().SetTrue(v1.ConditionTypeConsolidatable)
}
// create our RS so we can link a pod to it
rs := test.ReplicaSet()
ExpectApplied(ctx, env.Client, rs)
pods := test.Pods(4, test.PodOptions{
ObjectMeta: metav1.ObjectMeta{Labels: labels,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "apps/v1",
Kind: "ReplicaSet",
Name: rs.Name,
UID: rs.UID,
Controller: lo.ToPtr(true),
BlockOwnerDeletion: lo.ToPtr(true),
},
},
},
})

ExpectApplied(ctx, env.Client, rs, pods[0], pods[1], pods[2], pods[3], nodeClaims[0], nodes[0], nodeClaims[1], nodes[1], nodeClaims[2], nodes[2], nodePool)

// bind pods to nodes
ExpectManualBinding(ctx, env.Client, pods[0], nodes[0])
ExpectManualBinding(ctx, env.Client, pods[1], nodes[1])
ExpectManualBinding(ctx, env.Client, pods[2], nodes[2])
ExpectManualBinding(ctx, env.Client, pods[3], nodes[2])

// inform cluster state about nodes and nodeclaims
ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*corev1.Node{nodes[0], nodes[1], nodes[2]}, []*v1.NodeClaim{nodeClaims[0], nodeClaims[1], nodeClaims[2]})
// create deadline in the past
deadlineCtx, cancel := context.WithDeadline(ctx, fakeClock.Now().Add(-disruption.MultiNodeConsolidationTimeoutDuration))
defer cancel()

ExpectSingletonReconciled(deadlineCtx, disruptionController)
// expect that due to timeout zero nodes were tainted in consolidation
ExpectTaintedNodeCount(ctx, env.Client, 0)
})
})

func leastExpensiveInstanceWithZone(zone string) *cloudprovider.InstanceType {
Expand Down
3 changes: 3 additions & 0 deletions pkg/controllers/provisioning/provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,9 @@ func (p *Provisioner) NewScheduler(ctx context.Context, pods []*corev1.Pod, stat

instanceTypes := map[string][]*cloudprovider.InstanceType{}
for _, np := range nodePools {
if ctx.Err() != nil {
return nil, fmt.Errorf("context error while getting instance types, %w", ctx.Err())
}
// Get instance type options
its, err := p.cloudProvider.GetInstanceTypes(ctx, np)
if err != nil {
Expand Down
10 changes: 10 additions & 0 deletions pkg/controllers/provisioning/scheduling/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,12 @@ func (s *Scheduler) Solve(ctx context.Context, pods []*corev1.Pod) Results {
break
}

// when context has been canceled or deadline exceeded, stop attempting to schedule pods and mark current pod as unschedulable
if ctx.Err() != nil {
errors[pod] = ctx.Err()
break
}

// Schedule to existing nodes or create a new node
if errors[pod] = s.add(ctx, pod); errors[pod] == nil {
delete(errors, pod)
Expand Down Expand Up @@ -289,6 +295,10 @@ func (s *Scheduler) updateCachedPodData(p *corev1.Pod) {
}

func (s *Scheduler) add(ctx context.Context, pod *corev1.Pod) error {
// Check if context has been canceled or deadline exceeded
if ctx.Err() != nil {
return ctx.Err()
}
// first try to schedule against an in-flight real node
for _, node := range s.existingNodes {
if err := node.Add(ctx, s.kubeClient, pod, s.cachedPodData[pod.UID]); err == nil {
Expand Down