Skip to content

Commit 8efbc96

Browse files
authored
Bump retries & fix context timeouts (#131)
* Bump built-in retries to 20 Default is 3, with a max back-off time of 20s between attempts https://aws.github.io/aws-sdk-go-v2/docs/configuring-sdk/retries-timeouts/#standard-retryer * NewContext should actually be new It shouldn't be based on the previous context, and therefore inherit its deadline and just try to implement an additional one - this will never work because the old deadline will always be the initial timeout time after launch, and we aren't actually renewing a timer at all. By just creating a brand new context every time instead and throwing the old one away, we're actually getting a fresh timer. * remove double-sleep
1 parent 92f1b01 commit 8efbc96

File tree

12 files changed

+63
-35
lines changed

12 files changed

+63
-35
lines changed

aws/aws.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ import (
2121
"strconv"
2222
"time"
2323

24+
"github.com/aws/aws-sdk-go-v2/aws"
25+
"github.com/aws/aws-sdk-go-v2/aws/retry"
2426
"github.com/aws/aws-sdk-go-v2/config"
2527
"github.com/aws/aws-sdk-go-v2/service/autoscaling"
2628
at "github.com/aws/aws-sdk-go-v2/service/autoscaling/types"
@@ -44,7 +46,12 @@ func GetAWSClients(ctx context.Context) (*Clients, error) {
4446
region = "us-east-1"
4547
}
4648

47-
cfg, err := config.LoadDefaultConfig(ctx, config.WithRegion(region))
49+
cfg, err := config.LoadDefaultConfig(ctx,
50+
config.WithRegion(region),
51+
config.WithRetryer(func() aws.Retryer {
52+
return retry.AddWithMaxAttempts(retry.NewStandard(), 20)
53+
}),
54+
)
4855
if err != nil {
4956
return nil, errors.Wrap(err, "Error opening default AWS config")
5057
}

bouncer/runner.go

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ const (
5151

5252
asgSeparator = ","
5353
desiredCapSeparator = ":"
54+
55+
debugTimeFormat = "2006-01-02 15:04:05 MST"
5456
)
5557

5658
// NewBaseRunner instantiates a BaseRunner
@@ -189,23 +191,32 @@ func (r *BaseRunner) SetDesiredCapacity(ctx context.Context, asg *ASG, desiredCa
189191
}
190192

191193
// NewContext generates a context with the ItemTimeout from the parent context given
192-
func (r *BaseRunner) NewContext(ctx context.Context) (context.Context, context.CancelFunc) {
193-
return context.WithTimeout(ctx, r.Opts.ItemTimeout)
194-
}
194+
func (r *BaseRunner) NewContext() (context.Context, context.CancelFunc) {
195+
ctx, cancel := context.WithTimeout(context.Background(), r.Opts.ItemTimeout)
196+
dn, _ := ctx.Deadline()
195197

196-
// ResetAndSleep resets our context timer (because we just performed a mutation action), and then sleeps
197-
func (r *BaseRunner) ResetAndSleep(ctx context.Context) (context.Context, context.CancelFunc) {
198-
log.Debugf("Resetting timer")
198+
l := log.WithFields(log.Fields{
199+
"Context deadline": dn.Format(debugTimeFormat),
200+
"Current time": getHumanCurrentTime(),
201+
})
199202

200-
ctx, cancel := r.NewContext(ctx)
201-
r.Sleep(ctx)
203+
l.Debug("Generating fresh context")
202204

203205
return ctx, cancel
204206
}
205207

208+
func getHumanCurrentTime() string {
209+
return time.Now().Format(debugTimeFormat)
210+
}
211+
206212
// Sleep makes us sleep for the constant time - call this when waiting for an AWS change
207213
func (r *BaseRunner) Sleep(ctx context.Context) {
208-
log.Debugf("Sleeping for %v", waitBetweenChecks)
214+
l := log.WithFields(log.Fields{
215+
"Sleep Duration": waitBetweenChecks,
216+
"Current time": getHumanCurrentTime(),
217+
})
218+
219+
l.Debug("Sleeping between checks")
209220

210221
select {
211222
case <-time.After(waitBetweenChecks):

canary/runner.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,10 @@ func (r *Runner) ValidatePrereqs(ctx context.Context) error {
9090
}
9191

9292
// Run has the meat of the batch job
93-
func (r *Runner) Run(ctx context.Context) error {
93+
func (r *Runner) Run() error {
9494
var newDesiredCapacity int32
9595

96-
ctx, cancel := r.NewContext(ctx)
96+
ctx, cancel := r.NewContext()
9797
defer cancel()
9898

9999
for {
@@ -161,8 +161,9 @@ func (r *Runner) Run(ctx context.Context) error {
161161
return errors.Wrap(err, "error killing instance")
162162
}
163163
}
164-
ctx, cancel = r.ResetAndSleep(ctx)
164+
ctx, cancel = r.NewContext()
165165
defer cancel()
166+
r.Sleep(ctx)
166167

167168
continue
168169
}
@@ -202,8 +203,9 @@ func (r *Runner) Run(ctx context.Context) error {
202203
return errors.Wrap(err, "error setting desired capacity")
203204
}
204205

205-
ctx, cancel = r.ResetAndSleep(ctx)
206+
ctx, cancel = r.NewContext()
206207
defer cancel()
208+
r.Sleep(ctx)
207209

208210
continue
209211
}

cmd/canary.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ var canaryCmd = &cobra.Command{
7979
log.Fatal(err)
8080
}
8181

82-
err = r.Run(ctx)
82+
err = r.Run()
8383
if err != nil {
8484
log.Fatal(errors.Wrap(err, "error in run"))
8585
}

cmd/full.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ var fullCmd = &cobra.Command{
8080
log.Fatal(err)
8181
}
8282

83-
err = r.Run(ctx)
83+
err = r.Run()
8484
if err != nil {
8585
log.Fatal(errors.Wrap(err, "error in run"))
8686
}

cmd/rolling.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ var rollingCmd = &cobra.Command{
8080
log.Fatal(err)
8181
}
8282

83-
err = r.Run(ctx)
83+
err = r.Run()
8484
if err != nil {
8585
log.Fatal(errors.Wrap(err, "error in run"))
8686
}

cmd/serial.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ var serialCmd = &cobra.Command{
8080
log.Fatal(err)
8181
}
8282

83-
err = r.Run(ctx)
83+
err = r.Run()
8484
if err != nil {
8585
log.Fatal(errors.Wrap(err, "error in run"))
8686
}

cmd/slow-canary.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ var slowCanaryCmd = &cobra.Command{
7979
log.Fatal(err)
8080
}
8181

82-
err = r.Run(ctx)
82+
err = r.Run()
8383
if err != nil {
8484
log.Fatal(errors.Wrap(err, "error in run"))
8585
}

full/runner.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,10 @@ func asgSetWrapper(asg *bouncer.ASG) *bouncer.ASGSet {
100100
}
101101

102102
// Run has the meat of the batch job
103-
func (r *Runner) Run(ctx context.Context) error {
103+
func (r *Runner) Run() error {
104104
var newDesiredCapacity int32
105105

106-
ctx, cancel := r.NewContext(ctx)
106+
ctx, cancel := r.NewContext()
107107
defer cancel()
108108

109109
start:
@@ -132,8 +132,9 @@ start:
132132
return errors.Wrap(err, "failed to kill instance")
133133
}
134134

135-
ctx, cancel = r.ResetAndSleep(ctx)
135+
ctx, cancel = r.NewContext()
136136
defer cancel()
137+
r.Sleep(ctx)
137138

138139
continue start
139140
}
@@ -150,8 +151,9 @@ start:
150151
return errors.Wrap(err, "error setting desired capacity")
151152
}
152153

153-
ctx, cancel = r.ResetAndSleep(ctx)
154+
ctx, cancel = r.NewContext()
154155
defer cancel()
156+
r.Sleep(ctx)
155157

156158
continue start
157159
}

rolling/runner.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ func (r *Runner) ValidatePrereqs(ctx context.Context) error {
7979
}
8080

8181
// Run has the meat of the batch job
82-
func (r *Runner) Run(ctx context.Context) error {
83-
ctx, cancel := r.NewContext(ctx)
82+
func (r *Runner) Run() error {
83+
ctx, cancel := r.NewContext()
8484
defer cancel()
8585

8686
for {
@@ -104,8 +104,9 @@ func (r *Runner) Run(ctx context.Context) error {
104104
return errors.Wrap(err, "error finding or killing best old instance")
105105
}
106106

107-
ctx, cancel = r.ResetAndSleep(ctx)
107+
ctx, cancel = r.NewContext()
108108
defer cancel()
109+
r.Sleep(ctx)
109110

110111
continue
111112
}

0 commit comments

Comments
 (0)