Skip to content

Commit 6bb8719

Browse files
authored
Timeout error handling for CL2 large tests handled (#166)
1 parent fd5eb04 commit 6bb8719

File tree

1 file changed

+14
-3
lines changed
  • eks/cluster-loader/clusterloader2

1 file changed

+14
-3
lines changed

eks/cluster-loader/clusterloader2/addon.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ import (
1515
"go.uber.org/zap"
1616
"go.uber.org/zap/zapcore"
1717
v1 "k8s.io/api/batch/v1"
18+
"k8s.io/apimachinery/pkg/api/errors"
1819
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
20+
"k8s.io/client-go/util/retry"
1921
)
2022

2123
// IndentedNewline covers formatting issues with gotemplates
@@ -69,19 +71,28 @@ func (c *ClusterLoader) Apply() (err error) {
6971

7072
// Wait for job to complete -- 2 hours because larger tests take a very long time.
7173
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Hour)
74+
defer cancel()
7275
job := &v1.Job{}
7376
for job.Status.Succeeded < 1 {
7477
job, err = c.K8sClient.KubernetesClientSet().
7578
BatchV1().
7679
Jobs("clusterloader2").
7780
Get(ctx, "clusterloader2", metav1.GetOptions{})
7881
if err != nil {
79-
cancel()
80-
return fmt.Errorf("failed to get cl2 job (%v)", err)
82+
if errors.IsTimeout(err) {
83+
err = retry.OnError(retry.DefaultRetry, errors.IsTimeout, func() error {
84+
job, err = c.K8sClient.KubernetesClientSet().
85+
BatchV1().
86+
Jobs("clusterloader2").
87+
Get(ctx, "clusterloader2", metav1.GetOptions{})
88+
return fmt.Errorf("failed to get cl2 job (%v)", err)
89+
})
90+
} else if !errors.IsTimeout(err) {
91+
return fmt.Errorf("failed to get cl2 job (%v)", err)
92+
}
8193
}
8294
time.Sleep(10 * time.Second)
8395
}
84-
cancel()
8596
return nil
8697
}
8798

0 commit comments

Comments
 (0)