Skip to content

Commit 8f5ae8d

Browse files
committed
Add retries to tests when waiting on assuming new role via EKS Pod Identity
Signed-off-by: Daniel Carl Jones <djonesoa@amazon.com>
1 parent 87be012 commit 8f5ae8d

1 file changed

Lines changed: 9 additions & 5 deletions

File tree

tests/e2e-kubernetes/testsuites/credentials.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,14 @@ const (
5959
)
6060

6161
const (
62-
eksauthAssumeRoleRetryCode = "AccessDeniedException"
6362
eksauthAssumeRoleRetryMaxAttempts = 0 // This will cause SDK to retry indefinitely, but we do have a timeout on the operation
6463
eksauthAssumeRoleRetryMaxBackoffDelay = 10 * time.Second
6564
)
6665

66+
var (
67+
eksauthAssumeRoleRetryErrorCodes = []string{"AccessDeniedException", "ResourceNotFoundException"}
68+
)
69+
6770
const (
6871
iamListAttachedRolePoliciesTimeout = 1 * time.Minute
6972
iamListAttachedRolePoliciesPolling = 5 * time.Second
@@ -1140,9 +1143,10 @@ func assumeRole(ctx context.Context, f *framework.Framework, roleArn string) *st
11401143
})
11411144
}
11421145

1143-
// waitUntilRoleIsAssumable waits until the given role is assumable.
1146+
// Waits until the given role is assumable.
1147+
//
11441148
// This is needed because we're creating new roles in our test cases and then trying to assume those roles,
1145-
// but there is a delay between IAM and STS services and newly created roles/policies does not appear on STS immediately.
1149+
// but there's a delay between IAM and the token service (STS or EKS Auth) resulting in errors such as "access denied" or "not found".
11461150
func waitUntilRoleIsAssumable[Input any, Output any, O any](
11471151
ctx context.Context,
11481152
assumeFunc func(context.Context, *Input, ...func(O)) (*Output, error),
@@ -1177,7 +1181,7 @@ func waitUntilRoleIsAssumableEKS[Input any, Output any](
11771181
input *Input,
11781182
) *Output {
11791183
return waitUntilRoleIsAssumable(ctx, assumeFunc, input, func(o *eksauth.Options) {
1180-
o.Retryer = retry.AddWithErrorCodes(o.Retryer, eksauthAssumeRoleRetryCode)
1184+
o.Retryer = retry.AddWithErrorCodes(o.Retryer, eksauthAssumeRoleRetryErrorCodes...)
11811185
o.Retryer = retry.AddWithMaxAttempts(o.Retryer, eksauthAssumeRoleRetryMaxAttempts)
11821186
o.Retryer = retry.AddWithMaxBackoffDelay(o.Retryer, eksauthAssumeRoleRetryMaxBackoffDelay)
11831187
})
@@ -1205,7 +1209,7 @@ func waitUntilRoleIsAssumableWithWebIdentity(ctx context.Context, f *framework.F
12051209
}
12061210

12071211
func waitUntilRoleIsAssumableWithEKS(ctx context.Context, f *framework.Framework, sa *v1.ServiceAccount, pod *v1.Pod) {
1208-
// If you're seeing the following error, then it means you've made a typo in the cluster name when running the tests!
1212+
// If you see the following error, it may mean you've made a typo in the cluster name or the role is being assumed too quickly.
12091213
// [FAILED] operation error EKS Auth: AssumeRoleForPodIdentity, https response error StatusCode: 404, RequestID:
12101214
// ResourceNotFoundException: The token included in the request has no service account role association for it.
12111215

0 commit comments

Comments
 (0)