Skip to content

Commit db597b1

Browse files
authored
Merge pull request #7966 from pmendelski/htnap-events-for-tpu
Emit event on successful async scale-up
2 parents 7b69964 + 0c52255 commit db597b1

File tree

2 files changed

+68
-51
lines changed

2 files changed

+68
-51
lines changed

cluster-autoscaler/core/scaleup/orchestrator/async_initializer.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,9 +155,17 @@ func (s *AsyncNodeGroupInitializer) InitializeNodeGroup(result nodegroups.AsyncN
155155
return
156156
}
157157
klog.Infof("Initial scale-up succeeded. Scale ups: %v", scaleUpInfos)
158+
s.emitScaleUpStatus(&status.ScaleUpStatus{
159+
Result: status.ScaleUpSuccessful,
160+
ScaleUpInfos: scaleUpInfos,
161+
CreateNodeGroupResults: []nodegroups.CreateNodeGroupResult{result.CreationResult},
162+
PodsTriggeredScaleUp: s.triggeringPods,
163+
}, nil)
158164
}
159165

160166
func (s *AsyncNodeGroupInitializer) emitScaleUpStatus(scaleUpStatus *status.ScaleUpStatus, err errors.AutoscalerError) {
161-
status.UpdateScaleUpError(scaleUpStatus, err)
167+
if err != nil {
168+
status.UpdateScaleUpError(scaleUpStatus, err)
169+
}
162170
s.scaleUpStatusProcessor.Process(s.context, scaleUpStatus)
163171
}

cluster-autoscaler/core/scaleup/orchestrator/async_initializer_test.go

Lines changed: 59 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"k8s.io/autoscaler/cluster-autoscaler/expander"
3131
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups"
3232
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups/asyncnodegroups"
33+
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset"
3334
"k8s.io/autoscaler/cluster-autoscaler/processors/status"
3435
processorstest "k8s.io/autoscaler/cluster-autoscaler/processors/test"
3536
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
@@ -41,72 +42,80 @@ import (
4142
)
4243

4344
func TestNodePoolAsyncInitialization(t *testing.T) {
45+
scaleUpSize := 3
46+
failingNodeGroupName := "failing-ng"
47+
provider := testprovider.NewTestCloudProvider(
48+
func(nodeGroup string, increase int) error {
49+
if nodeGroup == failingNodeGroupName {
50+
return fmt.Errorf("Simulated error")
51+
}
52+
return nil
53+
}, nil)
54+
pod := BuildTestPod("p1", 2, 1000)
55+
failingNodeGroup := provider.BuildNodeGroup(failingNodeGroupName, 0, 100, 0, false, true, "T1", nil)
56+
successfulNodeGroup := provider.BuildNodeGroup("async-ng", 0, 100, 0, false, true, "T1", nil)
57+
failedScaleUpErr := errors.ToAutoscalerError(errors.CloudProviderError, fmt.Errorf("Simulated error")).AddPrefix("failed to increase node group size: ")
4458
testCases := []struct {
45-
name string
46-
failingScaleUps map[string]bool
47-
expectedScaleUps map[string]int
59+
name string
60+
nodeGroup *testprovider.TestNodeGroup
61+
wantStatus status.ScaleUpStatus
4862
}{
4963
{
50-
name: "scale up upcoming node group",
51-
expectedScaleUps: map[string]int{"async-ng": 3},
64+
name: "scale up upcoming node group",
65+
nodeGroup: successfulNodeGroup,
66+
wantStatus: status.ScaleUpStatus{
67+
Result: status.ScaleUpSuccessful,
68+
ScaleUpInfos: []nodegroupset.ScaleUpInfo{
69+
{
70+
Group: successfulNodeGroup,
71+
CurrentSize: 0,
72+
NewSize: scaleUpSize,
73+
MaxSize: successfulNodeGroup.MaxSize(),
74+
},
75+
},
76+
CreateNodeGroupResults: []nodegroups.CreateNodeGroupResult{
77+
{MainCreatedNodeGroup: successfulNodeGroup},
78+
},
79+
PodsTriggeredScaleUp: []*apiv1.Pod{pod},
80+
},
5281
},
5382
{
54-
name: "failing initial scale up",
55-
failingScaleUps: map[string]bool{"async-ng": true},
83+
name: "failing initial scale up",
84+
nodeGroup: failingNodeGroup,
85+
wantStatus: status.ScaleUpStatus{
86+
Result: status.ScaleUpError,
87+
ScaleUpError: &failedScaleUpErr,
88+
CreateNodeGroupResults: []nodegroups.CreateNodeGroupResult{
89+
{MainCreatedNodeGroup: failingNodeGroup},
90+
},
91+
FailedResizeNodeGroups: []cloudprovider.NodeGroup{failingNodeGroup},
92+
PodsTriggeredScaleUp: []*apiv1.Pod{pod},
93+
},
5694
},
5795
}
96+
listers := kube_util.NewListerRegistry(nil, nil, nil, nil, nil, nil, nil, nil, nil)
97+
upcomingNodeGroup := provider.BuildNodeGroup("upcoming-ng", 0, 100, 0, false, true, "T1", nil)
98+
options := config.AutoscalingOptions{AsyncNodeGroupsEnabled: true}
99+
context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, listers, provider, nil, nil)
100+
assert.NoError(t, err)
101+
option := expander.Option{NodeGroup: upcomingNodeGroup, Pods: []*apiv1.Pod{pod}}
102+
processors := processorstest.NewTestProcessors(&context)
103+
processors.AsyncNodeGroupStateChecker = &asyncnodegroups.MockAsyncNodeGroupStateChecker{IsUpcomingNodeGroup: map[string]bool{upcomingNodeGroup.Id(): true}}
104+
nodeInfo := framework.NewTestNodeInfo(BuildTestNode("t1", 100, 0))
105+
executor := newScaleUpExecutor(&context, processors.ScaleStateNotifier, processors.AsyncNodeGroupStateChecker)
58106
for _, tc := range testCases {
59107
t.Run(tc.name, func(t *testing.T) {
60-
scaledUpGroups := make(map[string]int)
61-
provider := testprovider.NewTestCloudProvider(
62-
func(nodeGroup string, increase int) error {
63-
if tc.failingScaleUps[nodeGroup] {
64-
return fmt.Errorf("Simulated error")
65-
}
66-
scaledUpGroups[nodeGroup] += increase
67-
return nil
68-
}, nil)
69-
options := config.AutoscalingOptions{
70-
AsyncNodeGroupsEnabled: true,
71-
}
72-
listers := kube_util.NewListerRegistry(nil, nil, nil, nil, nil, nil, nil, nil, nil)
73-
context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, listers, provider, nil, nil)
74-
assert.NoError(t, err)
75-
p1 := BuildTestPod("p1", 2, 1000)
76-
upcomingNodeGroup := provider.BuildNodeGroup("upcoming-ng", 0, 100, 0, false, true, "T1", nil)
77-
createdNodeGroup := provider.BuildNodeGroup("async-ng", 0, 100, 0, false, true, "T1", nil)
78-
option := expander.Option{
79-
NodeGroup: upcomingNodeGroup,
80-
Pods: []*apiv1.Pod{p1},
81-
}
82-
processors := processorstest.NewTestProcessors(&context)
83-
processors.AsyncNodeGroupStateChecker = &asyncnodegroups.MockAsyncNodeGroupStateChecker{IsUpcomingNodeGroup: map[string]bool{upcomingNodeGroup.Id(): true}}
84-
nodeInfo := framework.NewTestNodeInfo(BuildTestNode("t1", 100, 0))
85-
executor := newScaleUpExecutor(&context, processors.ScaleStateNotifier, processors.AsyncNodeGroupStateChecker)
86108
scaleUpStatusProcessor := &fakeScaleUpStatusProcessor{}
87109
initializer := NewAsyncNodeGroupInitializer(&option, nodeInfo, executor, taints.TaintConfig{}, nil, scaleUpStatusProcessor, &context, false)
88-
initializer.SetTargetSize(upcomingNodeGroup.Id(), 3)
110+
initializer.SetTargetSize(upcomingNodeGroup.Id(), int64(scaleUpSize))
89111
asyncResult := nodegroups.AsyncNodeGroupCreationResult{
90-
CreationResult: nodegroups.CreateNodeGroupResult{MainCreatedNodeGroup: createdNodeGroup},
112+
CreationResult: nodegroups.CreateNodeGroupResult{MainCreatedNodeGroup: tc.nodeGroup},
91113
CreatedToUpcomingMapping: map[string]string{
92-
createdNodeGroup.Id(): upcomingNodeGroup.Id(),
114+
tc.nodeGroup.Id(): upcomingNodeGroup.Id(),
93115
},
94116
}
95117
initializer.InitializeNodeGroup(asyncResult)
96-
assert.Equal(t, len(scaledUpGroups), len(tc.expectedScaleUps))
97-
for groupName, increase := range tc.expectedScaleUps {
98-
assert.Equal(t, increase, scaledUpGroups[groupName])
99-
}
100-
if len(tc.failingScaleUps) > 0 {
101-
expectedErr := errors.ToAutoscalerError(errors.CloudProviderError, fmt.Errorf("Simulated error")).AddPrefix("failed to increase node group size: ")
102-
assert.Equal(t, scaleUpStatusProcessor.lastStatus, &status.ScaleUpStatus{
103-
Result: status.ScaleUpError,
104-
ScaleUpError: &expectedErr,
105-
CreateNodeGroupResults: []nodegroups.CreateNodeGroupResult{asyncResult.CreationResult},
106-
FailedResizeNodeGroups: []cloudprovider.NodeGroup{createdNodeGroup},
107-
PodsTriggeredScaleUp: option.Pods,
108-
})
109-
}
118+
assert.Equal(t, *scaleUpStatusProcessor.lastStatus, tc.wantStatus)
110119
})
111120
}
112121
}

0 commit comments

Comments
 (0)