Skip to content

Commit 9b51a5d

Browse files
committed
categorize transient errors to avoid SLO inflation
1 parent 066b0e8 commit 9b51a5d

File tree

2 files changed

+10
-6
lines changed

2 files changed

+10
-6
lines changed

pkg/csi_driver/utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ func extractErrorFromGcsFuseErrorFile(errMsg []byte, err error) (codes.Code, err
507507
}
508508
if strings.Contains(errMsgStr, util.SidecarBucketAccessCheckErrorPrefix) {
509509
if code == codes.Internal {
510-
code = codes.Unavailable // Sidecar bucket access check retries on any failure to connect to the bucket so mark these as Unavailable to avoid SLO false triggers.
510+
code = codes.Unavailable // Sidecar bucket access check retries on any failure to connect to the bucket or metadata service setup retries on any failure, so mark these as Unavailable to avoid SLO false triggers.
511511
}
512512
return code, fmt.Errorf("%v", errMsgStr) // Remember the error string already contains SidecarBucketAccessCheckErrorPrefix
513513
}

pkg/sidecar_mounter/sidecar_mounter.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -461,15 +461,15 @@ func (m *Mounter) checkBucketAccessWithRetry(ctx context.Context, tokenSource oa
461461
if ss == nil {
462462
ss, err = m.StorageServiceManager.SetupStorageServiceForSidecar(ctx, tokenSource)
463463
if err != nil {
464-
mc.ErrWriter.WriteMsg(fmt.Sprintf("%q: %q: %v %v, retrying...", util.SidecarBucketAccessCheckErrorPrefix, util.StorageServiceErrorStr, storage.ParseErrCode(err), err))
464+
mc.ErrWriter.WriteMsg(retryableError(fmt.Sprintf("%q: %v %v, retrying...", util.StorageServiceErrorStr, storage.ParseErrCode(err), err)))
465465
return false, nil
466466
}
467467
klog.V(4).Infof("Created storage service %v", ss)
468468
}
469469

470470
if bucketName != "_" {
471471
if exist, err := ss.CheckBucketExists(ctx, &storage.ServiceBucket{Name: bucketName}); !exist {
472-
mc.ErrWriter.WriteMsg(fmt.Sprintf("%q: failed to get GCS bucket %q: %v %v", util.SidecarBucketAccessCheckErrorPrefix, bucketName, storage.ParseErrCode(err), err))
472+
mc.ErrWriter.WriteMsg(retryableError(fmt.Sprintf("failed to get GCS bucket %q: %v %v", bucketName, storage.ParseErrCode(err), err)))
473473
return false, nil
474474
}
475475
klog.V(4).Infof("Bucket access check passed for %s", bucketName)
@@ -515,14 +515,14 @@ func (m *Mounter) SetupTokenAndStorageManager(ctx context.Context, clientset cli
515515
setupTokenAndStorageManagerFunc := func(ctx context.Context) (bool, error) {
516516
meta, err := cpmeta.NewMetadataService(mc.TokenServerIdentityPool, mc.TokenServerIdentityProvider)
517517
if err != nil {
518-
mc.ErrWriter.WriteMsg(fmt.Sprintf("Failed to setup metadata service: %v for identity pool %s and identity provider %s, retrying....", err, mc.TokenServerIdentityPool, mc.TokenServerIdentityProvider))
518+
mc.ErrWriter.WriteMsg(retryableError(fmt.Sprintf("failed to setup metadata service, got error: %v for identity pool %q and identity provider %q, retrying....", err, mc.TokenServerIdentityPool, mc.TokenServerIdentityProvider)))
519519
return false, nil
520520
}
521521

522522
tm = auth.NewTokenManager(meta, clientset)
523523
ssm, err = storage.NewGCSServiceManager()
524524
if err != nil {
525-
mc.ErrWriter.WriteMsg(fmt.Sprintf("Failed to setup storage service manager, got error: %v for identity pool %s and identity provider %s, retrying...", err, mc.TokenServerIdentityPool, mc.TokenServerIdentityProvider))
525+
mc.ErrWriter.WriteMsg(retryableError(fmt.Sprintf("failed to setup storage service manager, got error: %v for identity pool %q and identity provider %q, retrying...", err, mc.TokenServerIdentityPool, mc.TokenServerIdentityProvider)))
526526
return false, nil
527527
}
528528
m.TokenManager = tm
@@ -535,7 +535,7 @@ func (m *Mounter) SetupTokenAndStorageManager(ctx context.Context, clientset cli
535535
klog.V(4).Infof("Setup complete for token manager and storage service manager %v and %v", m.TokenManager, m.StorageServiceManager)
536536
return nil
537537
}
538-
return fmt.Errorf("Verify both identity pool and identity provider are provided, got: %s and %s respectively", mc.TokenServerIdentityPool, mc.TokenServerIdentityProvider)
538+
return errors.New(retryableError(fmt.Sprintf("both identity pool and identity provider must be provided, got: %q and %q respectively", mc.TokenServerIdentityPool, mc.TokenServerIdentityProvider)))
539539
}
540540

541541
func fetchIdentityBindingToken(ctx context.Context, k8sSAToken string, identityProvider string) (*oauth2.Token, error) {
@@ -569,3 +569,7 @@ func fetchIdentityBindingToken(ctx context.Context, k8sSAToken string, identityP
569569
Expiry: time.Now().Add(time.Second * time.Duration(stsResponse.ExpiresIn)),
570570
}, nil
571571
}
572+
573+
func retryableError(inputErr string) string {
574+
return fmt.Sprintf("%s: %v", util.SidecarBucketAccessCheckErrorPrefix, inputErr)
575+
}

0 commit comments

Comments
 (0)