Skip to content

Commit e5f377d

Browse files
neganovalexeyjeevatkm
authored andcommitted
change Backoff() algorithm (#237)
1) Add capabilities to handle Retry-After headers and similar info from server Motivation: some servers provide Retry-After header or similar info along with 429 or 503 status code, and it is often important to honor such information on retries, i.e. simple expotential backoff is not optimal. https://docs.microsoft.com/en-us/sharepoint/dev/general-development/how-to-avoid-getting-throttled-or-blocked-in-sharepoint-online 2) Add option NOT to retry even if operation returns an error (but retry by default, if no retry conditions are set) Motivation: error are already passed to condition callback in resty, but Backoff() still retries the request if error is not nil. It implies excessive, stillborn retries for non-retryble errors from underlying http client (i.e. with RoundTripper from oauth2). 3) Remove error return value from condition callback Motivation: this error is neither passed to caller, nor logged in any way. It is cleaner to have "needRetry == true" than "needRetry == true || conditionErr != nil". 4) Does not use floating-point arithmetics for expotential backoff Motivation: simplification & performance
1 parent 46fc51a commit e5f377d

File tree

4 files changed

+376
-71
lines changed

4 files changed

+376
-71
lines changed

client.go

+8
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ type Client struct {
8989
RetryWaitTime time.Duration
9090
RetryMaxWaitTime time.Duration
9191
RetryConditions []RetryConditionFunc
92+
RetryAfter RetryAfterFunc
9293
JSONMarshal func(v interface{}) ([]byte, error)
9394
JSONUnmarshal func(data []byte, v interface{}) error
9495

@@ -515,6 +516,13 @@ func (c *Client) SetRetryMaxWaitTime(maxWaitTime time.Duration) *Client {
515516
return c
516517
}
517518

519+
// SetRetryAfter sets callback to calculate wait time between retries.
520+
// Default (nil) implies exponential backoff with jitter
521+
func (c *Client) SetRetryAfter(callback RetryAfterFunc) *Client {
522+
c.RetryAfter = callback
523+
return c
524+
}
525+
518526
// AddRetryCondition method adds a retry condition function to array of functions
519527
// that are checked to determine if the request is retried. The request will
520528
// retry if any of the functions return true and error is nil.

request.go

+1-6
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ func (r *Request) Execute(method, url string) (*Response, error) {
566566

567567
var resp *Response
568568
attempt := 0
569-
_ = Backoff(
569+
err = Backoff(
570570
func() (*Response, error) {
571571
attempt++
572572

@@ -575,11 +575,6 @@ func (r *Request) Execute(method, url string) (*Response, error) {
575575
resp, err = r.client.execute(r)
576576
if err != nil {
577577
r.client.log.Errorf("%v, Attempt %v", err, attempt)
578-
if r.ctx != nil && r.ctx.Err() != nil {
579-
// stop Backoff from retrying request if request has been
580-
// canceled by context
581-
return resp, nil
582-
}
583578
}
584579

585580
return resp, err

retry.go

+75-24
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,16 @@ type (
2121
Option func(*Options)
2222

2323
// RetryConditionFunc type is for retry condition function
24-
RetryConditionFunc func(*Response, error) (bool, error)
24+
// input: non-nil Response OR request execution error
25+
RetryConditionFunc func(*Response, error) bool
26+
27+
// RetryAfterFunc returns time to wait before retry
28+
// For example, it can parse HTTP Retry-After header
29+
// https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html
30+
// Non-nil error is returned if it is found that request is not retryable
31+
// (0, nil) is a special result means 'use default algorithm'
32+
RetryAfterFunc func(*Client, *Response) (time.Duration, error)
2533

26-
// Options to hold Resty retry values.
2734
Options struct {
2835
maxRetries int
2936
waitTime time.Duration
@@ -79,40 +86,84 @@ func Backoff(operation func() (*Response, error), options ...Option) error {
7986
resp *Response
8087
err error
8188
)
82-
base := float64(opts.waitTime) // Time to wait between each attempt
83-
capLevel := float64(opts.maxWaitTime) // Maximum amount of wait time for the retry
89+
8490
for attempt := 0; attempt < opts.maxRetries; attempt++ {
8591
resp, err = operation()
92+
if resp != nil && resp.Request.ctx != nil && resp.Request.ctx.Err() != nil {
93+
return err
94+
}
95+
96+
needsRetry := err != nil // retry on operation errors by default
8697

87-
var needsRetry bool
88-
var conditionErr error
8998
for _, condition := range opts.retryConditions {
90-
needsRetry, conditionErr = condition(resp, err)
91-
if needsRetry || conditionErr != nil {
99+
needsRetry = condition(resp, err)
100+
if needsRetry {
92101
break
93102
}
94103
}
95104

96-
// If the operation returned no error, there was no condition satisfied and
97-
// there was no error caused by the conditional functions.
98-
if err == nil && !needsRetry && conditionErr == nil {
99-
return nil
100-
}
101-
// Adding capped exponential backup with jitter
102-
// See the following article...
103-
// http://www.awsarchitectureblog.com/2015/03/backoff.html
104-
temp := math.Min(capLevel, base*math.Exp2(float64(attempt)))
105-
ri := int(temp / 2)
106-
if ri <= 0 {
107-
ri = 1<<31 - 1 // max int for arch 386
105+
if !needsRetry {
106+
return err
108107
}
109-
sleepDuration := time.Duration(math.Abs(float64(ri + rand.Intn(ri))))
110108

111-
if sleepDuration < opts.waitTime {
112-
sleepDuration = opts.waitTime
109+
waitTime, err2 := sleepDuration(resp, opts.waitTime, opts.maxWaitTime, attempt)
110+
if err2 != nil {
111+
if err == nil {
112+
err = err2
113+
}
114+
return err
113115
}
114-
time.Sleep(sleepDuration)
116+
time.Sleep(waitTime)
115117
}
116118

117119
return err
118120
}
121+
122+
func sleepDuration(resp *Response, min, max time.Duration, attempt int) (time.Duration, error) {
123+
const maxInt = 1<<31 -1 // max int for arch 386
124+
125+
if max < 0 {
126+
max = maxInt
127+
}
128+
129+
if resp == nil {
130+
goto defaultCase
131+
}
132+
133+
// 1. Check for custom callback
134+
if retryAfterFunc := resp.Request.client.RetryAfter; retryAfterFunc != nil {
135+
result, err := retryAfterFunc(resp.Request.client, resp)
136+
if err != nil {
137+
return 0, err // i.e. 'API quota exceeded'
138+
}
139+
if result == 0 {
140+
goto defaultCase
141+
}
142+
if result < 0 || max < result {
143+
result = max
144+
}
145+
if result < min {
146+
result = min
147+
}
148+
return result, nil
149+
}
150+
151+
// 2. Return capped exponential backoff with jitter
152+
// http://www.awsarchitectureblog.com/2015/03/backoff.html
153+
defaultCase:
154+
base := float64(min)
155+
capLevel := float64(max)
156+
157+
temp := math.Min(capLevel, base*math.Exp2(float64(attempt)))
158+
ri := int(temp / 2)
159+
if ri <= 0 {
160+
ri = maxInt // max int for arch 386
161+
}
162+
result := time.Duration(math.Abs(float64(ri + rand.Intn(ri))))
163+
164+
if result < min {
165+
result = min
166+
}
167+
168+
return result, nil
169+
}

0 commit comments

Comments
 (0)