Skip to content

Commit a35c610

Browse files
Add generic retry logic for exec commands
1 parent 6935995 commit a35c610

1 file changed

Lines changed: 59 additions & 11 deletions

File tree

test/e2e/specs/specs.go

Lines changed: 59 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ const (
107107
backoffJitter = 0.1
108108
)
109109

110+
// Note to developers adding new testing methods - Please check the code path of newly added methods and ensure that those requiring
111+
// konnectivity agents are wrapped with retry logic, see `runKubectlWithFullOutputWithRetry` as an example.
112+
// See here for the list of commands that require the agents - go/konnectivity-network-proxy#egress_traffic.
113+
110114
type TestPod struct {
111115
client clientset.Interface
112116
pod *corev1.Pod
@@ -242,27 +246,54 @@ func (t *TestPod) VerifyExecInPodFail(f *framework.Framework, containerName, shE
242246

243247
// execCommandInContainerWithFullOutputWithRetry executes a command in a target pod and retries with gradual back until timeout(10 min) or success.
244248
func execCommandInContainerWithFullOutputWithRetry(f *framework.Framework, podName, containerName string, cmd ...string) (string, string, error) {
249+
return RetryWithBackoffTwoReturnValues(func() (string, string, error) {
250+
return e2epod.ExecCommandInContainerWithFullOutput(f, podName, containerName, cmd...)
251+
})
252+
}
245253

254+
// Retry executes a generic operation (op) with exponential backoff.
255+
// T can be any type (string, a struct, a slice, etc).
256+
func Retry[T any](op func() (T, error)) (T, error) {
246257
backoff := wait.Backoff{
247258
Duration: backoffDuration,
248259
Factor: backoffFactor,
249260
Cap: backoffCap,
250261
Steps: backoffSteps,
251262
Jitter: backoffJitter,
252263
}
253-
var err error
254-
var stdout, stderr string
264+
265+
var result T
266+
var lastErr error
255267

256268
wait.ExponentialBackoff(backoff, func() (bool, error) {
257-
stdout, stderr, err = e2epod.ExecCommandInContainerWithFullOutput(f, podName, containerName, cmd...)
258-
if err != nil {
259-
framework.Logf("Exec command failed with error: %v. Retrying...", err)
269+
result, lastErr = op()
270+
271+
if lastErr != nil {
272+
framework.Logf("Operation failed with error: %v. Retrying...", lastErr)
260273
return false, nil
261274
}
262275
return true, nil
263276
})
264277

265-
return stdout, stderr, err
278+
if lastErr != nil {
279+
framework.Logf("Operation failed after %d steps (total time/cap: %v). Last error: %v", backoff.Steps, backoff.Cap, lastErr)
280+
} else {
281+
framework.Logf("Operation succeeded.")
282+
}
283+
284+
return result, lastErr
285+
}
286+
287+
func RetryWithBackoffOneReturnValue(op func() (string, error)) (string, error) {
288+
return Retry(op)
289+
}
290+
291+
func RetryWithBackoffTwoReturnValues(op func() (string, string, error)) (string, string, error) {
292+
res, err := Retry(func() ([2]string, error) {
293+
stdout, stderr, err := op()
294+
return [2]string{stdout, stderr}, err
295+
})
296+
return res[0], res[1], err
266297
}
267298

268299
func (t *TestPod) WaitForRunning(ctx context.Context) {
@@ -316,10 +347,16 @@ func (t *TestPod) WaitForPodNotFoundInNamespace(ctx context.Context) {
316347
}
317348

318349
func (t *TestPod) WaitForLog(ctx context.Context, container string, expectedString string) {
319-
_, err := e2epodooutput.LookForStringInLogWithoutKubectl(ctx, t.client, t.namespace.Name, t.pod.Name, container, expectedString, pollTimeout)
350+
_, err := lookForStringInLogWithoutKubectlWithRetry(ctx, t.client, t.namespace.Name, t.pod.Name, container, expectedString, pollTimeout)
320351
framework.ExpectNoError(err)
321352
}
322353

354+
func lookForStringInLogWithoutKubectlWithRetry(ctx context.Context, client clientset.Interface, namespace, podName, container, expectedString string, timeout time.Duration) (string, error) {
355+
return RetryWithBackoffOneReturnValue(func() (string, error) {
356+
return e2epodooutput.LookForStringInLogWithoutKubectl(ctx, client, namespace, podName, container, expectedString, timeout)
357+
})
358+
}
359+
323360
func (t *TestPod) CheckSidecarNeverTerminatedAfterAWhile(ctx context.Context, isNativeSidecar bool) {
324361
time.Sleep(pollTimeout)
325362

@@ -1271,15 +1308,15 @@ func GetGCSFuseVersion(ctx context.Context, f *framework.Framework) string {
12711308
}
12721309

12731310
func DeployIstioSidecar(namespace string) {
1274-
e2ekubectl.RunKubectlOrDie(namespace, "apply", "--filename", "./specs/istio-sidecar.yaml")
1311+
runKubectlOrDie(namespace, "apply", "--filename", "./specs/istio-sidecar.yaml")
12751312
}
12761313

12771314
func DeployIstioServiceEntry(namespace string) {
1278-
e2ekubectl.RunKubectlOrDie(namespace, "apply", "--filename", "./specs/istio-service-entry.yaml")
1315+
runKubectlOrDie(namespace, "apply", "--filename", "./specs/istio-service-entry.yaml")
12791316
}
12801317

12811318
func (t *TestPod) VerifyDefaultingFlagsArePassed(namespace string, expectedMachineTypeFlag string, expectedDisableAutoconfigFlag bool) {
1282-
stdout, stderr, err := e2ekubectl.RunKubectlWithFullOutput(namespace, "logs", t.pod.Name, "-c", "gke-gcsfuse-sidecar")
1319+
stdout, stderr, err := runKubectlWithFullOutputWithRetry(namespace, "logs", t.pod.Name, "-c", "gke-gcsfuse-sidecar")
12831320
framework.ExpectNoError(err,
12841321
"Error accessing logs from pod %v, but failed with error message %q\nstdout: %s\nstderr: %s",
12851322
t.pod.Name, err, stdout, stderr)
@@ -1294,7 +1331,7 @@ func (t *TestPod) VerifyDefaultingFlagsArePassed(namespace string, expectedMachi
12941331
}
12951332

12961333
func (t *TestPod) VerifyProfileFlagsAreNotPassed(namespace string) {
1297-
stdout, stderr, err := e2ekubectl.RunKubectlWithFullOutput(namespace, "logs", t.pod.Name, "-c", "gke-gcsfuse-sidecar")
1334+
stdout, stderr, err := runKubectlWithFullOutputWithRetry(namespace, "logs", t.pod.Name, "-c", "gke-gcsfuse-sidecar")
12981335
framework.ExpectNoError(err,
12991336
"Error accessing logs from pod %v, but failed with error message %q\nstdout: %s\nstderr: %s",
13001337
t.pod.Name, err, stdout, stderr)
@@ -1307,3 +1344,14 @@ func (t *TestPod) VerifyProfileFlagsAreNotPassed(namespace string) {
13071344
gomega.Expect(stdout).To(gomega.Not(gomega.MatchRegexp(`map\[.*profile:aiml-training.*\]`)),
13081345
"Should NOT find 'profile:aiml-training' within the gcsfuse config file content map, but it was found.")
13091346
}
1347+
1348+
func runKubectlOrDie(namespace string, args ...string) {
1349+
_, _, err := runKubectlWithFullOutputWithRetry(namespace, args...)
1350+
framework.ExpectNoError(err)
1351+
}
1352+
1353+
func runKubectlWithFullOutputWithRetry(namespace string, args ...string) (string, string, error) {
1354+
return RetryWithBackoffTwoReturnValues(func() (string, string, error) {
1355+
return e2ekubectl.RunKubectlWithFullOutput(namespace, args...)
1356+
})
1357+
}

0 commit comments

Comments
 (0)