Skip to content

Commit 2e4c9fe

Browse files
authored
fix(e2e): retry VMSS creation on GalleryImageNotFound error (#8239)
Signed-off-by: Suraj Deshmukh <suraj.deshmukh@microsoft.com>
1 parent b97dbde commit 2e4c9fe

File tree

1 file changed

+14
-1
lines changed

1 file changed

+14
-1
lines changed

e2e/vmss.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,9 +274,22 @@ func CreateVMSSWithRetry(ctx context.Context, s *Scenario) (*ScenarioVM, error)
274274
delay := 5 * time.Second
275275
retryOn := func(err error) bool {
276276
var respErr *azcore.ResponseError
277+
// only retry on Azure API errors with specific error codes
278+
if !errors.As(err, &respErr) {
279+
return false
280+
}
277281
// AllocationFailed sometimes happens for exotic SKUs (new GPUs) with limited availability, sometimes retrying helps
278282
// It's not a quota issue
279-
return errors.As(err, &respErr) && respErr.StatusCode == 200 && respErr.ErrorCode == "AllocationFailed"
283+
if respErr.StatusCode == 200 && respErr.ErrorCode == "AllocationFailed" {
284+
return true
285+
}
286+
// GalleryImageNotFound can happen transiently after image replication completes
287+
// due to Azure eventual consistency - the gallery API reports success but the
288+
// compute fabric in the target region hasn't fully propagated the image yet
289+
if respErr.StatusCode == 404 && respErr.ErrorCode == "GalleryImageNotFound" {
290+
return true
291+
}
292+
return false
280293
}
281294

282295
maxAttempts := 10

0 commit comments

Comments
 (0)