Skip to content

Commit 5794d85

Browse files
authored
chore: add retry logic to docker crane (#5657)
1 parent dc7f84d commit 5794d85

File tree

5 files changed

+412
-10
lines changed

5 files changed

+412
-10
lines changed

cmd/imagePushToRegistry.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,15 @@ type imagePushToRegistryUtilsBundle struct {
5050
// imagePushToRegistryUtilsBundle and forward to the implementation of the dependency.
5151
}
5252

53-
func newImagePushToRegistryUtils() imagePushToRegistryUtils {
53+
func newImagePushToRegistryUtils(disableHTTP2 bool) imagePushToRegistryUtils {
54+
craneBundle := docker.NewCraneUtilsBundle()
55+
craneBundle.DisableHTTP2 = disableHTTP2
5456
utils := imagePushToRegistryUtilsBundle{
5557
Command: &command.Command{
5658
StepName: "imagePushToRegistry",
5759
},
5860
Files: &piperutils.Files{},
59-
dockerImageUtils: &docker.CraneUtilsBundle{},
61+
dockerImageUtils: craneBundle,
6062
}
6163
// Reroute command output to logging framework
6264
utils.Stdout(log.Writer())
@@ -67,7 +69,7 @@ func newImagePushToRegistryUtils() imagePushToRegistryUtils {
6769
func imagePushToRegistry(config imagePushToRegistryOptions, telemetryData *telemetry.CustomData) {
6870
// Utils can be used wherever the command.ExecRunner interface is expected.
6971
// It can also be used for example as a mavenExecRunner.
70-
utils := newImagePushToRegistryUtils()
72+
utils := newImagePushToRegistryUtils(config.DisableHTTP2)
7173

7274
// For HTTP calls import piperhttp "github.com/SAP/jenkins-library/pkg/http"
7375
// and use a &piperhttp.Client{} in a custom system

cmd/imagePushToRegistry_generated.go

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/docker/crane.go

Lines changed: 161 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,185 @@ package docker
22

33
import (
44
"context"
5+
"crypto/tls"
6+
"errors"
7+
"net"
8+
"net/http"
9+
"strings"
10+
"time"
511

612
"github.com/google/go-containerregistry/pkg/crane"
713
v1 "github.com/google/go-containerregistry/pkg/v1"
14+
"github.com/google/go-containerregistry/pkg/v1/remote/transport"
15+
16+
"github.com/SAP/jenkins-library/pkg/log"
17+
)
18+
19+
// Retry configuration for transient network errors
20+
const (
21+
defaultMaxRetries = 3
22+
defaultInitialBackoff = 5 * time.Second
23+
defaultMaxBackoff = 60 * time.Second
24+
defaultBackoffFactor = 2.0
825
)
926

10-
type CraneUtilsBundle struct{}
27+
type craneUtilsBundle struct {
28+
MaxRetries int
29+
InitialBackoff time.Duration
30+
BackoffFactor float64
31+
DisableHTTP2 bool
32+
}
33+
34+
// NewCraneUtilsBundle creates a new craneUtilsBundle with default retry settings.
35+
func NewCraneUtilsBundle() *craneUtilsBundle {
36+
return &craneUtilsBundle{
37+
MaxRetries: defaultMaxRetries,
38+
InitialBackoff: defaultInitialBackoff,
39+
BackoffFactor: defaultBackoffFactor,
40+
DisableHTTP2: false,
41+
}
42+
}
43+
44+
// newHTTPTransport creates an HTTP transport optimized for large file transfers.
45+
func newHTTPTransport(disableHTTP2 bool) *http.Transport {
46+
return &http.Transport{
47+
Proxy: http.ProxyFromEnvironment,
48+
DialContext: (&net.Dialer{
49+
Timeout: 30 * time.Second,
50+
KeepAlive: 30 * time.Second,
51+
}).DialContext,
52+
ForceAttemptHTTP2: !disableHTTP2,
53+
MaxIdleConns: 100,
54+
MaxIdleConnsPerHost: 10,
55+
IdleConnTimeout: 90 * time.Second,
56+
TLSHandshakeTimeout: 10 * time.Second,
57+
ExpectContinueTimeout: 1 * time.Second,
58+
ResponseHeaderTimeout: 0,
59+
TLSClientConfig: &tls.Config{
60+
MinVersion: tls.VersionTLS12,
61+
},
62+
}
63+
}
64+
65+
// isRetryableError checks if the error is transient and should be retried
66+
func isRetryableError(err error) bool {
67+
if err == nil {
68+
return false
69+
}
70+
71+
// Check for registry transport errors (e.g., 5xx status codes)
72+
// TODO: go 1.26 supports AsType
73+
var transportErr *transport.Error
74+
if errors.As(err, &transportErr) && transportErr.Temporary() {
75+
return true
76+
}
77+
78+
// Fallback to string matching for errors not wrapped as transport.Error
79+
// (e.g., HTTP/2 stream errors which are plain string errors)
80+
errMsg := err.Error()
81+
switch {
82+
case strings.Contains(errMsg, "stream error"), // HTTP/2 stream errors
83+
strings.Contains(errMsg, "connection reset"), // Connection reset errors
84+
strings.Contains(errMsg, "unexpected EOF"), // EOF during transfer
85+
strings.Contains(errMsg, "timeout"), // Timeout errors
86+
strings.Contains(errMsg, "Timeout"),
87+
strings.Contains(errMsg, "network"), // Network errors
88+
strings.Contains(errMsg, "connection refused"):
89+
return true
90+
default:
91+
return false
92+
}
93+
}
94+
95+
// retryOperation executes an operation with exponential backoff retry logic
96+
func (c *craneUtilsBundle) retryOperation(ctx context.Context, operation string, task func() error) error {
97+
maxRetries := c.MaxRetries
98+
if maxRetries <= 0 {
99+
maxRetries = defaultMaxRetries
100+
}
101+
backoff := c.InitialBackoff
102+
if backoff <= 0 {
103+
backoff = defaultInitialBackoff
104+
}
105+
factor := c.BackoffFactor
106+
if factor <= 0 {
107+
factor = defaultBackoffFactor
108+
}
109+
110+
var lastErr error
111+
for attempt := 1; attempt <= maxRetries; attempt++ {
112+
lastErr = task()
113+
if lastErr == nil {
114+
return nil
115+
}
116+
117+
if !isRetryableError(lastErr) {
118+
log.Entry().Debugf("%s: non-retryable error: %v", operation, lastErr)
119+
return lastErr
120+
}
121+
122+
if attempt >= maxRetries {
123+
log.Entry().Warnf("%s: all %d attempts failed, last error: %v", operation, maxRetries, lastErr)
124+
return lastErr
125+
}
126+
127+
log.Entry().Warnf("%s: attempt %d/%d failed with retryable error: %v, retrying in %v...",
128+
operation, attempt, maxRetries, lastErr, backoff)
129+
130+
select {
131+
case <-ctx.Done():
132+
return ctx.Err()
133+
case <-time.After(backoff):
134+
}
135+
136+
backoff = time.Duration(float64(backoff) * factor)
137+
if backoff > defaultMaxBackoff {
138+
backoff = defaultMaxBackoff
139+
}
140+
}
141+
return lastErr
142+
}
143+
144+
// getCraneOptions returns common crane options with custom transport
145+
func (c *craneUtilsBundle) getCraneOptions(ctx context.Context, platform *v1.Platform) []crane.Option {
146+
opts := []crane.Option{
147+
crane.WithContext(ctx),
148+
crane.WithTransport(newHTTPTransport(c.DisableHTTP2)),
149+
}
150+
if platform != nil {
151+
opts = append(opts, crane.WithPlatform(platform))
152+
}
153+
return opts
154+
}
11155

12-
func (c *CraneUtilsBundle) CopyImage(ctx context.Context, src, dest, platform string) error {
156+
func (c *craneUtilsBundle) CopyImage(ctx context.Context, src, dest, platform string) error {
13157
p, err := parsePlatform(platform)
14158
if err != nil {
15159
return err
16160
}
17-
return crane.Copy(src, dest, crane.WithContext(ctx), crane.WithPlatform(p))
161+
return c.retryOperation(ctx, "CopyImage", func() error {
162+
return crane.Copy(src, dest, c.getCraneOptions(ctx, p)...)
163+
})
18164
}
19165

20-
func (c *CraneUtilsBundle) PushImage(ctx context.Context, im v1.Image, dest, platform string) error {
166+
func (c *craneUtilsBundle) PushImage(ctx context.Context, im v1.Image, dest, platform string) error {
21167
p, err := parsePlatform(platform)
22168
if err != nil {
23169
return err
24170
}
25-
return crane.Push(im, dest, crane.WithContext(ctx), crane.WithPlatform(p))
171+
return c.retryOperation(ctx, "PushImage", func() error {
172+
return crane.Push(im, dest, c.getCraneOptions(ctx, p)...)
173+
})
26174
}
27175

28-
func (c *CraneUtilsBundle) LoadImage(ctx context.Context, src string) (v1.Image, error) {
29-
return crane.Load(src, crane.WithContext(ctx))
176+
func (c *craneUtilsBundle) LoadImage(ctx context.Context, src string) (v1.Image, error) {
177+
var img v1.Image
178+
err := c.retryOperation(ctx, "LoadImage", func() error {
179+
var loadErr error
180+
img, loadErr = crane.Load(src, crane.WithContext(ctx))
181+
return loadErr
182+
})
183+
return img, err
30184
}
31185

32186
// parsePlatform is a wrapper for v1.ParsePlatform. It is necessary because

0 commit comments

Comments
 (0)