Skip to content

Commit f9e94bb

Browse files
committed
eks/cluster/version-upgrade: retry server version fetch
Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
1 parent ab832ea commit f9e94bb

File tree

1 file changed

+31
-10
lines changed

1 file changed

+31
-10
lines changed

eks/cluster/version-upgrade/version-upgrade.go

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ func (ts *tester) Create() (err error) {
8080

8181
initialWait := 3 * time.Minute
8282

83-
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute)
83+
ctx, cancel := context.WithTimeout(context.Background(), 45*time.Minute)
8484
ch := Poll(
8585
ctx,
8686
ts.cfg.Stopc,
@@ -100,19 +100,40 @@ func (ts *tester) Create() (err error) {
100100
return err
101101
}
102102

103-
ts.cfg.EKSConfig.Status.ServerVersionInfo, err = ts.cfg.K8SClient.FetchServerVersion()
103+
// may take a while to shut down the last master instance with old cluster version
104+
waitDur, retryStart := 5*time.Minute, time.Now()
105+
for time.Now().Sub(retryStart) < waitDur {
106+
select {
107+
case <-ts.cfg.Stopc:
108+
ts.cfg.Logger.Warn("version check aborted")
109+
return nil
110+
case <-time.After(5 * time.Second):
111+
}
112+
113+
ts.cfg.EKSConfig.Status.ServerVersionInfo, err = ts.cfg.K8SClient.FetchServerVersion()
114+
if err != nil {
115+
ts.cfg.Logger.Warn("failed to fetch server version", zap.Error(err))
116+
continue
117+
}
118+
119+
ts.cfg.EKSConfig.Sync()
120+
cur := fmt.Sprintf("%.2f", ts.cfg.EKSConfig.Status.ServerVersionInfo.VersionValue)
121+
target := fmt.Sprintf("%.2f", ts.cfg.EKSConfig.AddOnClusterVersionUpgrade.VersionValue)
122+
123+
ts.cfg.Logger.Info("fetched version", zap.String("current", cur), zap.String("target", target))
124+
if cur != target {
125+
err = fmt.Errorf("EKS server version after upgrade expected %q, got %q [%+v]", target, cur, ts.cfg.EKSConfig.Status.ServerVersionInfo)
126+
continue
127+
}
128+
129+
err = nil
130+
break
131+
}
104132
if err != nil {
105133
return err
106134
}
107-
ts.cfg.EKSConfig.Sync()
108-
109-
cur := fmt.Sprintf("%.2f", ts.cfg.EKSConfig.Status.ServerVersionInfo.VersionValue)
110-
target := fmt.Sprintf("%.2f", ts.cfg.EKSConfig.AddOnClusterVersionUpgrade.VersionValue)
111-
if cur != target {
112-
return fmt.Errorf("EKS server version after upgrade expected %q, got %q [%+v]", target, cur, ts.cfg.EKSConfig.Status.ServerVersionInfo)
113-
}
114135

115-
waitDur, retryStart := 5*time.Minute, time.Now()
136+
waitDur, retryStart = 5*time.Minute, time.Now()
116137
for time.Now().Sub(retryStart) < waitDur {
117138
select {
118139
case <-ts.cfg.Stopc:

0 commit comments

Comments
 (0)