Skip to content

Commit e439dfb

Browse files
committed
fix(exoscale): enhance UX around SKS nodepools
In the 1.31.0 and lower version SKS nodepools where discovered through exoscale instance pools. Whereas it works as expected, end user experience is not very convenient, especially when end users use priority based expander. On our portal SKS nodepools are shown with their ID and the instancepool is hidden in the SKS nodepool page itself. People trying to use priority based expander put SKS nodepool ID instead of our generic compute API InstancePool ID. For convenience, starting with this commit we are using SKS nodepool ID and we add retro compatibility for already cluster autoscaler managed SKS clusters. This will reduce a bit our support on it, and make priority based expander UX more convenient.
1 parent e90519a commit e439dfb

6 files changed

+79
-12
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# VSCode project files
1717
**/.vscode
1818
*.code-workspace
19+
__debug_bin*
1920

2021
# Emacs save files
2122
*~

cluster-autoscaler/cloudprovider/exoscale/exoscale_cloud_provider_test.go

+36-2
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,21 @@ func (ts *cloudProviderTestSuite) TestExoscaleCloudProvider_NodeGroupForNode_Ins
197197
nil,
198198
)
199199

200+
ts.p.manager.client.(*exoscaleClientMock).
201+
On("ListSKSClusters", ts.p.manager.ctx, ts.p.manager.zone).
202+
Return(
203+
[]*egoscale.SKSCluster{{
204+
ID: &testSKSClusterID,
205+
Name: &testSKSClusterName,
206+
Nodepools: []*egoscale.SKSNodepool{{
207+
ID: &testSKSNodepoolID,
208+
InstancePoolID: &testInstancePoolID,
209+
Name: &testSKSNodepoolName,
210+
}},
211+
}},
212+
nil,
213+
)
214+
200215
nodeGroup, err := ts.p.NodeGroupForNode(&apiv1.Node{
201216
Spec: apiv1.NodeSpec{
202217
ProviderID: toProviderID(testInstanceID),
@@ -280,7 +295,7 @@ func (ts *cloudProviderTestSuite) TestExoscaleCloudProvider_NodeGroupForNode_SKS
280295
})
281296
ts.Require().NoError(err)
282297
ts.Require().NotNil(nodeGroup)
283-
ts.Require().Equal(testInstancePoolID, nodeGroup.Id())
298+
ts.Require().Equal(testSKSNodepoolID, nodeGroup.Id())
284299
ts.Require().IsType(&sksNodepoolNodeGroup{}, nodeGroup)
285300
}
286301

@@ -359,6 +374,21 @@ func (ts *cloudProviderTestSuite) TestExoscaleCloudProvider_NodeGroups() {
359374
nil,
360375
)
361376

377+
ts.p.manager.client.(*exoscaleClientMock).
378+
On("ListSKSClusters", ts.p.manager.ctx, ts.p.manager.zone).
379+
Return(
380+
[]*egoscale.SKSCluster{{
381+
ID: &testSKSClusterID,
382+
Name: &testSKSClusterName,
383+
Nodepools: []*egoscale.SKSNodepool{{
384+
ID: &testSKSNodepoolID,
385+
InstancePoolID: &sksNodepoolInstancePoolID,
386+
Name: &testSKSNodepoolName,
387+
}},
388+
}},
389+
nil,
390+
)
391+
362392
instancePoolNodeGroup, err := ts.p.NodeGroupForNode(&apiv1.Node{
363393
Spec: apiv1.NodeSpec{
364394
ProviderID: toProviderID(instancePoolInstanceID),
@@ -432,7 +462,11 @@ func (ts *cloudProviderTestSuite) TestExoscaleCloudProvider_NodeGroups() {
432462

433463
// ---------------------------------------------------------------
434464

435-
ts.Require().Len(ts.p.NodeGroups(), 2)
465+
nodeGroups := ts.p.NodeGroups()
466+
ts.Require().Len(nodeGroups, 2)
467+
if len(nodeGroups) >= 2 {
468+
ts.Require().Equal(nodeGroups[1].Id(), testSKSNodepoolID)
469+
}
436470
}
437471

438472
func TestSuiteExoscaleCloudProvider(t *testing.T) {

cluster-autoscaler/cloudprovider/exoscale/exoscale_manager.go

+36-4
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,46 @@ func newManager(discoveryOpts cloudprovider.NodeGroupDiscoveryOptions) (*Manager
9696
// based on the `--scan-interval`. By default it's 10 seconds.
9797
func (m *Manager) Refresh() error {
9898
var nodeGroups []cloudprovider.NodeGroup
99+
100+
// load clusters, it's required for SKS node groups check
101+
sksClusters, err := m.client.ListSKSClusters(m.ctx, m.zone)
102+
if err != nil {
103+
errorf("unable to list SKS clusters: %v", err)
104+
return err
105+
}
106+
99107
for _, ng := range m.nodeGroups {
100-
if _, err := m.client.GetInstancePool(m.ctx, m.zone, ng.Id()); err != nil {
101-
if errors.Is(err, exoapi.ErrNotFound) {
108+
// Check SKS Nodepool existence first
109+
found := false
110+
for _, c := range sksClusters {
111+
for _, np := range c.Nodepools {
112+
if *np.ID == ng.Id() {
113+
if _, err := m.client.GetInstancePool(m.ctx, m.zone, *np.InstancePoolID); err != nil {
114+
if !errors.Is(err, exoapi.ErrNotFound) {
115+
errorf("unable to retrieve SKS Instance Pool %s: %v", ng.Id(), err)
116+
return err
117+
}
118+
} else {
119+
found = true
120+
break
121+
}
122+
}
123+
}
124+
}
125+
126+
if !found {
127+
// If SKS Nodepool is not found, check the Instance Pool
128+
// it was the previous behavior which was less convenient for end user UX
129+
if _, err := m.client.GetInstancePool(m.ctx, m.zone, ng.Id()); err != nil {
130+
if !errors.Is(err, exoapi.ErrNotFound) {
131+
errorf("unable to retrieve SKS Instance Pool %s: %v", ng.Id(), err)
132+
return err
133+
}
134+
135+
// Neither SKS Nodepool nor Instance Pool found, remove it from cache
102136
debugf("removing node group %s from manager cache", ng.Id())
103137
continue
104138
}
105-
errorf("unable to retrieve Instance Pool %s: %v", ng.Id(), err)
106-
return err
107139
}
108140

109141
nodeGroups = append(nodeGroups, ng)

cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_instance_pool.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,9 @@ func (n *instancePoolNodeGroup) GetOptions(_ config.NodeGroupAutoscalingOptions)
212212

213213
func (n *instancePoolNodeGroup) waitUntilRunning(ctx context.Context) error {
214214
return pollCmd(ctx, func() (bool, error) {
215-
instancePool, err := n.m.client.GetInstancePool(ctx, n.m.zone, n.Id())
215+
instancePool, err := n.m.client.GetInstancePool(ctx, n.m.zone, *n.instancePool.ID)
216216
if err != nil {
217-
errorf("unable to retrieve Instance Pool %s: %s", n.Id(), err)
217+
errorf("unable to retrieve Instance Pool %s: %s", *n.instancePool.ID, err)
218218
return false, err
219219
}
220220

cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ func (n *sksNodepoolNodeGroup) DecreaseTargetSize(_ int) error {
151151

152152
// Id returns an unique identifier of the node group.
153153
func (n *sksNodepoolNodeGroup) Id() string {
154-
return *n.sksNodepool.InstancePoolID
154+
return *n.sksNodepool.ID
155155
}
156156

157157
// Debug returns a string containing all information regarding this node group.
@@ -229,9 +229,9 @@ func (n *sksNodepoolNodeGroup) GetOptions(_ config.NodeGroupAutoscalingOptions)
229229

230230
func (n *sksNodepoolNodeGroup) waitUntilRunning(ctx context.Context) error {
231231
return pollCmd(ctx, func() (bool, error) {
232-
instancePool, err := n.m.client.GetInstancePool(ctx, n.m.zone, n.Id())
232+
instancePool, err := n.m.client.GetInstancePool(ctx, n.m.zone, *n.sksNodepool.InstancePoolID)
233233
if err != nil {
234-
errorf("unable to retrieve Instance Pool %s: %s", n.Id(), err)
234+
errorf("unable to retrieve Instance Pool %s: %s", *n.sksNodepool.InstancePoolID, err)
235235
return false, err
236236
}
237237

cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_Id() {
206206
maxSize: int(testComputeInstanceQuotaLimit),
207207
}
208208

209-
ts.Require().Equal(testInstancePoolID, nodeGroup.Id())
209+
ts.Require().Equal(testSKSNodepoolID, nodeGroup.Id())
210210
}
211211

212212
func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_Nodes() {

0 commit comments

Comments
 (0)