Skip to content

Commit 0ded273

Browse files
authored
Merge pull request #1261 from Sneha-at/bug_fix
Add retry to metadataservice client creation
2 parents 4669639 + 066b0e8 commit 0ded273

File tree

3 files changed

+44
-25
lines changed

3 files changed

+44
-25
lines changed

cmd/sidecar_mounter/main.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,17 +95,15 @@ func main() {
9595
}
9696
if mc != nil {
9797
if mc.EnableSidecarBucketAccessCheck {
98-
tm, ssm, err := mounter.SetupTokenAndStorageManager(nil /*k8sClientset*/, mc)
99-
if err != nil {
100-
klog.Fatalf("Failed to fetch identity pool and identity provider details required for bucket access check, got error %v", err)
101-
}
102-
mounter.TokenManager = tm
103-
mounter.StorageServiceManager = ssm
10498
mc.SidecarRetryConfig.Cap = *storageServiceAndBucketAccessCap
10599
mc.SidecarRetryConfig.Steps = *storageServiceAndBucketAccessSteps
106100
mc.SidecarRetryConfig.Factor = *storageServiceAndBucketAccessFactor
107101
mc.SidecarRetryConfig.Duration = *storageServiceAndBucketAccessDuration
108102
mc.SidecarRetryConfig.Jitter = *storageServiceAndBucketAccessJitter
103+
err := mounter.SetupTokenAndStorageManager(ctx, nil /*k8sClientset*/, mc)
104+
if err != nil {
105+
klog.Fatalf("Failed to fetch identity pool and identity provider details required for bucket access check, got error %v", err)
106+
}
109107
}
110108
if err := mounter.Mount(ctx, mc); err != nil {
111109
mc.ErrWriter.WriteMsg(fmt.Sprintf("failed to mount bucket %q for volume %q: %v\n", mc.BucketName, mc.VolumeName, err))

pkg/csi_driver/utils.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,9 @@ func extractErrorFromGcsFuseErrorFile(errMsg []byte, err error) (codes.Code, err
509509
code = codes.Unauthenticated
510510
}
511511
if strings.Contains(errMsgStr, util.SidecarBucketAccessCheckErrorPrefix) {
512+
if code == codes.Internal {
513+
code = codes.Unavailable // Sidecar bucket access check retries on any failure to connect to the bucket so mark these as Unavailable to avoid SLO false triggers.
514+
}
512515
return code, fmt.Errorf("%v", errMsgStr) // Remember the error string already contains SidecarBucketAccessCheckErrorPrefix
513516
}
514517
return code, fmt.Errorf("gcsfuse failed with error: %v", errMsgStr)

pkg/sidecar_mounter/sidecar_mounter.go

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ func (m *Mounter) Mount(ctx context.Context, mc *MountConfig) error {
9797
}
9898
}
9999
if mc.EnableSidecarBucketAccessCheck {
100-
err := m.checkBucketAccessWithRetry(ctx, m.StorageServiceManager, tokenSource, m.TokenManager, mc.BucketName, mc.TokenServerIdentityProvider, mc)
100+
err := m.checkBucketAccessWithRetry(ctx, tokenSource, mc.BucketName, mc.TokenServerIdentityProvider, mc)
101101
if err != nil {
102102
return status.Errorf(codes.Unauthenticated, "failed to prepare storage service or check bucket access, failed with error: %v", err)
103103
}
@@ -439,18 +439,25 @@ func StartTokenServer(ctx context.Context, tokenURLBasePath, tokenSocketName str
439439
}
440440
}
441441

442-
// checkBucketAccessWithRetry prepares the GCS Storage Service using the Kubernetes Service Account from VolumeContext and validates bucket access.
443-
func (m *Mounter) checkBucketAccessWithRetry(ctx context.Context, storageServiceManager storage.ServiceManager, tokenSource oauth2.TokenSource, tm auth.TokenManager, bucketName string, tokenProvider string, mc *MountConfig) error {
442+
func (m *Mounter) executeFuncWithRetry(ctx context.Context, mc *MountConfig, f func(ctx context.Context) (bool, error)) error {
444443
backoff := wait.Backoff{
445444
Duration: mc.SidecarRetryConfig.Duration,
446445
Factor: mc.SidecarRetryConfig.Factor,
447446
Cap: mc.SidecarRetryConfig.Cap,
448447
Steps: mc.SidecarRetryConfig.Steps,
449448
Jitter: mc.SidecarRetryConfig.Jitter, // Adds randomness, this will give +/- 10% of the current delay
450449
}
450+
err := wait.ExponentialBackoffWithContext(ctx, backoff, f)
451+
if err != nil {
452+
return fmt.Errorf("operation failed after retries: %w", err)
453+
}
454+
return nil
455+
}
451456

452-
var ss storage.Service
457+
// checkBucketAccessWithRetry prepares the GCS Storage Service using the Kubernetes Service Account from VolumeContext and validates bucket access.
458+
func (m *Mounter) checkBucketAccessWithRetry(ctx context.Context, tokenSource oauth2.TokenSource, bucketName string, tokenProvider string, mc *MountConfig) error {
453459
var err error
460+
var ss storage.Service
454461
ssCreateAndBucketCheckFunc := func(ctx context.Context) (bool, error) {
455462
if ss == nil {
456463
ss, err = m.StorageServiceManager.SetupStorageServiceForSidecar(ctx, tokenSource)
@@ -476,11 +483,9 @@ func (m *Mounter) checkBucketAccessWithRetry(ctx context.Context, storageService
476483
}
477484
}
478485

479-
err = wait.ExponentialBackoffWithContext(ctx, backoff, ssCreateAndBucketCheckFunc)
480-
if err != nil {
481-
return fmt.Errorf("bucket access check failed after retries: %w", err)
486+
if err := m.executeFuncWithRetry(ctx, mc, ssCreateAndBucketCheckFunc); err != nil {
487+
return err
482488
}
483-
klog.V(4).Infof("Completed access check for bucket %s", bucketName)
484489
return nil
485490
}
486491

@@ -504,21 +509,34 @@ func getAudienceFromContextAndIdentityProvider(ctx context.Context, identityProv
504509
return identityProvider, nil
505510
}
506511

507-
func (m *Mounter) SetupTokenAndStorageManager(clientset clientset.Interface, mc *MountConfig) (auth.TokenManager, storage.ServiceManager, error) {
512+
func (m *Mounter) SetupTokenAndStorageManager(ctx context.Context, clientset clientset.Interface, mc *MountConfig) error {
508513
if mc.TokenServerIdentityPool != "" && mc.TokenServerIdentityProvider != "" {
509-
meta, err := cpmeta.NewMetadataService(mc.TokenServerIdentityPool, mc.TokenServerIdentityProvider)
510-
if err != nil {
511-
return nil, nil, fmt.Errorf("Failed to set up metadata service: %v for identity pool %s and identity provider %s", err, mc.TokenServerIdentityPool, mc.TokenServerIdentityProvider)
512-
}
514+
var tm auth.TokenManager
515+
var ssm storage.ServiceManager
516+
setupTokenAndStorageManagerFunc := func(ctx context.Context) (bool, error) {
517+
meta, err := cpmeta.NewMetadataService(mc.TokenServerIdentityPool, mc.TokenServerIdentityProvider)
518+
if err != nil {
519+
mc.ErrWriter.WriteMsg(fmt.Sprintf("Failed to setup metadata service: %v for identity pool %s and identity provider %s, retrying....", err, mc.TokenServerIdentityPool, mc.TokenServerIdentityProvider))
520+
return false, nil
521+
}
513522

514-
tm := auth.NewTokenManager(meta, clientset)
515-
ssm, err := storage.NewGCSServiceManager()
516-
if err != nil {
517-
return nil, nil, fmt.Errorf("Failed to set up storage service manager, got error: %v for identity pool %s and identity provider %s", err, mc.TokenServerIdentityPool, mc.TokenServerIdentityProvider)
523+
tm = auth.NewTokenManager(meta, clientset)
524+
ssm, err = storage.NewGCSServiceManager()
525+
if err != nil {
526+
mc.ErrWriter.WriteMsg(fmt.Sprintf("Failed to setup storage service manager, got error: %v for identity pool %s and identity provider %s, retrying...", err, mc.TokenServerIdentityPool, mc.TokenServerIdentityProvider))
527+
return false, nil
528+
}
529+
m.TokenManager = tm
530+
m.StorageServiceManager = ssm
531+
return true, nil
518532
}
519-
return tm, ssm, nil
533+
if err := m.executeFuncWithRetry(ctx, mc, setupTokenAndStorageManagerFunc); err != nil {
534+
return err
535+
}
536+
klog.V(4).Infof("Setup complete for token manager and storage service manager %v and %v", m.TokenManager, m.StorageServiceManager)
537+
return nil
520538
}
521-
return nil, nil, fmt.Errorf("Either of identity-pool %s or identity-provider %s were not provided", mc.TokenServerIdentityPool, mc.TokenServerIdentityProvider)
539+
return fmt.Errorf("Verify both identity pool and identity provider are provided, got: %s and %s respectively", mc.TokenServerIdentityPool, mc.TokenServerIdentityProvider)
522540
}
523541

524542
func fetchIdentityBindingToken(ctx context.Context, k8sSAToken string, identityProvider string) (*oauth2.Token, error) {

0 commit comments

Comments
 (0)