Skip to content

Commit 107ab7f

Browse files
committed
feat(ibmcloud): zone-level system type discovery with retry on capacity
1 parent 28e55f7 commit 107ab7f

3 files changed

Lines changed: 198 additions & 215 deletions

File tree

pkg/provider/ibmcloud/action/ibm-power/ibm-power.go

Lines changed: 86 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -120,50 +120,103 @@ func New(ctx *mc.ContextArgs, args *PWArgs) error {
120120

121121
prefix := util.If(len(args.Prefix) > 0, args.Prefix, "main")
122122

123-
sysTypeResult, err := icdata.SelectSystemType(mCtx, &icdata.SystemPoolRequirements{
123+
sysTypes, err := icdata.GetAvailableSystemTypes(mCtx, &icdata.SystemTypeRequirements{
124124
CloudInstanceId: args.WorkspaceID,
125-
Memory: args.Memory,
126-
Processors: args.Processors,
125+
Zone: os.Getenv("IC_ZONE"),
126+
ProcType: args.ProcType,
127127
PreferredType: args.SysType,
128128
})
129129
if err != nil {
130-
return fmt.Errorf("system type selection failed: %w", err)
131-
}
132-
if !sysTypeResult.IsPreferred {
133-
logging.Warnf("using system type %s instead of requested %s due to capacity constraints",
134-
sysTypeResult.SelectedType, args.SysType)
135-
}
136-
137-
r := &pwRequest{
138-
mCtx: mCtx,
139-
prefix: &prefix,
140-
piPrivateSubnetID: args.PIPrivateSubnetID,
141-
workspaceID: args.WorkspaceID,
142-
vpcPublicSubnetID: args.VPCPublicSubnetID,
143-
memory: args.Memory,
144-
processors: args.Processors,
145-
procType: args.ProcType,
146-
sysType: sysTypeResult.SelectedType,
147-
storageType: args.StorageType,
148-
diskSize: args.DiskSize,
149-
otelAppCode: args.OtelAppCode,
150-
otelAuthToken: args.OtelAuthToken,
151-
otelEndpoint: args.OtelEndpoint,
152-
otelIndex: args.OtelIndex,
153-
otelExtraAttrs: args.OtelExtraAttrs,
130+
return fmt.Errorf("system type discovery failed: %w", err)
154131
}
132+
133+
var lastErr error
134+
for i, sysType := range sysTypes.Types {
135+
if i > 0 {
136+
logging.Warnf("retrying with system type %s (%d/%d) after capacity failure",
137+
sysType, i+1, len(sysTypes.Types))
138+
}
139+
140+
r := &pwRequest{
141+
mCtx: mCtx,
142+
prefix: &prefix,
143+
piPrivateSubnetID: args.PIPrivateSubnetID,
144+
workspaceID: args.WorkspaceID,
145+
vpcPublicSubnetID: args.VPCPublicSubnetID,
146+
memory: args.Memory,
147+
processors: args.Processors,
148+
procType: args.ProcType,
149+
sysType: sysType,
150+
storageType: args.StorageType,
151+
diskSize: args.DiskSize,
152+
otelAppCode: args.OtelAppCode,
153+
otelAuthToken: args.OtelAuthToken,
154+
otelEndpoint: args.OtelEndpoint,
155+
otelIndex: args.OtelIndex,
156+
otelExtraAttrs: args.OtelExtraAttrs,
157+
}
158+
cs := manager.Stack{
159+
StackName: mCtx.StackNameByProject(stackIBMPowerVS),
160+
ProjectName: mCtx.ProjectName(),
161+
BackedURL: mCtx.BackedURL(),
162+
ProviderCredentials: ibmcloudp.DefaultCredentials,
163+
DeployFunc: r.deploy,
164+
}
165+
sr, err := manager.UpStack(r.mCtx, cs)
166+
if err == nil {
167+
if i > 0 {
168+
logging.Infof("provisioning succeeded with system type %s (attempt %d)", sysType, i+1)
169+
}
170+
return manageResults(mCtx, sr, prefix, r.vpcPublicSubnetID != "")
171+
}
172+
173+
lastErr = err
174+
if !isCapacityError(err) {
175+
return fmt.Errorf("stack creation failed: %w", err)
176+
}
177+
178+
logging.Warnf("capacity error with system type %s: %v", sysType, err)
179+
180+
if i < len(sysTypes.Types)-1 {
181+
logging.Infof("destroying partial stack before retry...")
182+
if dErr := destroyForRetry(mCtx); dErr != nil {
183+
logging.Warnf("failed to destroy partial stack: %v", dErr)
184+
}
185+
}
186+
}
187+
188+
return fmt.Errorf("all system types exhausted; last error: %w", lastErr)
189+
}
190+
191+
func isCapacityError(err error) bool {
192+
if err == nil {
193+
return false
194+
}
195+
errStr := strings.ToLower(err.Error())
196+
for _, pattern := range []string{
197+
"insufficient resources",
198+
"no available host",
199+
"capacity is not available",
200+
"not enough resources",
201+
"resource capacity",
202+
"no hosts available",
203+
"maximum capacity",
204+
} {
205+
if strings.Contains(errStr, pattern) {
206+
return true
207+
}
208+
}
209+
return false
210+
}
211+
212+
func destroyForRetry(mCtx *mc.Context) error {
155213
cs := manager.Stack{
156214
StackName: mCtx.StackNameByProject(stackIBMPowerVS),
157215
ProjectName: mCtx.ProjectName(),
158216
BackedURL: mCtx.BackedURL(),
159217
ProviderCredentials: ibmcloudp.DefaultCredentials,
160-
DeployFunc: r.deploy,
161-
}
162-
sr, err := manager.UpStack(r.mCtx, cs)
163-
if err != nil {
164-
return fmt.Errorf("stack creation failed: %w", err)
165218
}
166-
return manageResults(mCtx, sr, prefix, r.vpcPublicSubnetID != "")
219+
return manager.DestroyStack(mCtx, cs)
167220
}
168221

169222
// Destroy tears down the Power VS stack identified by mCtxArgs.

pkg/provider/ibmcloud/data/pisystempools.go

Lines changed: 0 additions & 182 deletions
This file was deleted.

0 commit comments

Comments
 (0)