Skip to content

Commit 585f02e

Browse files
committed
reverseproxy: prevent body close on dial-error retries (#7546)
cloneRequest shallow-copies Body, so clonedReq.Body and r.Body share the same io.ReadCloser. When Go's transport hits a dial error it calls req.Body.Close() before reading any bytes, which kills the original body for all subsequent retry attempts ("http: invalid Read on closed Body" → 502). Wrap the body in io.NopCloser when retries are configured so the transport's Close is a no-op. The real body is closed by the HTTP server when the handler returns. For already-buffered bodies (via request_buffers) the buffer is extracted into the same NopCloser path so both cases share a single code block. Unlike full eager buffering this adds zero memory or latency overhead — streaming is preserved and only the Close call is intercepted.
1 parent 7e83775 commit 585f02e

File tree

2 files changed

+281
-11
lines changed

2 files changed

+281
-11
lines changed
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
package reverseproxy
2+
3+
import (
4+
"errors"
5+
"io"
6+
"net"
7+
"net/http"
8+
"net/http/httptest"
9+
"strings"
10+
"sync"
11+
"testing"
12+
13+
"go.uber.org/zap"
14+
15+
"github.com/caddyserver/caddy/v2"
16+
"github.com/caddyserver/caddy/v2/modules/caddyhttp"
17+
)
18+
19+
// prepareTestRequest injects the context values that ServeHTTP and
20+
// proxyLoopIteration require (caddy.ReplacerCtxKey, VarsCtxKey, etc.) using
21+
// the same helper that the real HTTP server uses.
22+
//
23+
// A zero-value Server is passed so that caddyhttp.ServerCtxKey is set to a
24+
// non-nil pointer; reverseProxy dereferences it to check ShouldLogCredentials.
25+
func prepareTestRequest(req *http.Request) *http.Request {
26+
repl := caddy.NewReplacer()
27+
return caddyhttp.PrepareRequest(req, repl, nil, &caddyhttp.Server{})
28+
}
29+
30+
// closeOnCloseReader is an io.ReadCloser whose Close method actually makes
31+
// subsequent reads fail, mimicking the behaviour of a real HTTP request body
32+
// (as opposed to io.NopCloser, whose Close is a no-op and would mask the bug
33+
// we are testing).
34+
type closeOnCloseReader struct {
35+
mu sync.Mutex
36+
r *strings.Reader
37+
closed bool
38+
}
39+
40+
func newCloseOnCloseReader(s string) *closeOnCloseReader {
41+
return &closeOnCloseReader{r: strings.NewReader(s)}
42+
}
43+
44+
func (c *closeOnCloseReader) Read(p []byte) (int, error) {
45+
c.mu.Lock()
46+
defer c.mu.Unlock()
47+
if c.closed {
48+
return 0, errors.New("http: invalid Read on closed Body")
49+
}
50+
return c.r.Read(p)
51+
}
52+
53+
func (c *closeOnCloseReader) Close() error {
54+
c.mu.Lock()
55+
defer c.mu.Unlock()
56+
c.closed = true
57+
return nil
58+
}
59+
60+
// deadUpstreamAddr returns a TCP address that is guaranteed to refuse
61+
// connections: we bind a listener, note its address, close it immediately,
62+
// and return the address. Any dial to that address will get ECONNREFUSED.
63+
func deadUpstreamAddr(t *testing.T) string {
64+
t.Helper()
65+
ln, err := net.Listen("tcp", "127.0.0.1:0")
66+
if err != nil {
67+
t.Fatalf("failed to create dead upstream listener: %v", err)
68+
}
69+
addr := ln.Addr().String()
70+
ln.Close()
71+
return addr
72+
}
73+
74+
// testTransport wraps http.Transport to:
75+
// 1. Set the URL scheme to "http" when it is empty (matching what
76+
// HTTPTransport.SetScheme does in production; cloneRequest strips the
77+
// scheme intentionally so a plain *http.Transport would fail with
78+
// "unsupported protocol scheme").
79+
// 2. Wrap dial errors as DialError so that tryAgain correctly identifies them
80+
// as safe-to-retry regardless of request method (as HTTPTransport does in
81+
// production via its custom dialer).
82+
type testTransport struct{ *http.Transport }
83+
84+
func (t testTransport) RoundTrip(req *http.Request) (*http.Response, error) {
85+
if req.URL.Scheme == "" {
86+
req.URL.Scheme = "http"
87+
}
88+
resp, err := t.Transport.RoundTrip(req)
89+
if err != nil {
90+
// Wrap dial errors as DialError to match production behaviour.
91+
// Without this wrapping, tryAgain treats ECONNREFUSED on a POST
92+
// request as non-retryable (only GET is retried by default when
93+
// the error is not a DialError).
94+
var opErr *net.OpError
95+
if errors.As(err, &opErr) && opErr.Op == "dial" {
96+
return nil, DialError{err}
97+
}
98+
}
99+
return resp, err
100+
}
101+
102+
// minimalHandler returns a Handler with only the fields required by ServeHTTP
103+
// set directly, bypassing Provision (which requires a full Caddy runtime).
104+
// RoundRobinSelection is used so that successive iterations of the proxy loop
105+
// advance through the upstream pool in a predictable order.
106+
func minimalHandler(retries int, upstreams ...*Upstream) *Handler {
107+
return &Handler{
108+
logger: zap.NewNop(),
109+
Transport: testTransport{&http.Transport{}},
110+
Upstreams: upstreams,
111+
LoadBalancing: &LoadBalancing{
112+
Retries: retries,
113+
SelectionPolicy: &RoundRobinSelection{},
114+
// RetryMatch intentionally nil: dial errors are always retried
115+
// regardless of RetryMatch or request method.
116+
},
117+
// ctx, connections, connectionsMu, events: zero/nil values are safe
118+
// for the code paths exercised by these tests (TryInterval=0 so
119+
// ctx.Done() is never consulted; no WebSocket hijacking; no passive
120+
// health-check event emission).
121+
}
122+
}
123+
124+
// TestDialErrorBodyRetry verifies that a POST request whose body has NOT been
125+
// pre-buffered via request_buffers can still be retried after a dial error.
126+
//
127+
// Before the fix, a dial error caused Go's transport to close the shared body
128+
// (via cloneRequest's shallow copy), so the retry attempt would read from an
129+
// already-closed io.ReadCloser and produce:
130+
//
131+
// http: invalid Read on closed Body → HTTP 502
132+
//
133+
// After the fix the handler wraps the body in noCloseBody when retries are
134+
// configured, preventing the transport's Close() from propagating to the
135+
// shared body. Since dial errors never read any bytes, the body remains at
136+
// position 0 for the retry.
137+
func TestDialErrorBodyRetry(t *testing.T) {
138+
// Good upstream: echoes the request body with 200 OK.
139+
goodServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
140+
body, err := io.ReadAll(r.Body)
141+
if err != nil {
142+
http.Error(w, "read body: "+err.Error(), http.StatusInternalServerError)
143+
return
144+
}
145+
w.WriteHeader(http.StatusOK)
146+
_, _ = w.Write(body)
147+
}))
148+
t.Cleanup(goodServer.Close)
149+
150+
const requestBody = "hello, retry"
151+
152+
tests := []struct {
153+
name string
154+
method string
155+
body string
156+
retries int
157+
wantStatus int
158+
wantBody string
159+
}{
160+
{
161+
// Core regression case: POST with a body, no request_buffers,
162+
// dial error on first upstream → retry to second upstream succeeds.
163+
name: "POST body retried after dial error",
164+
method: http.MethodPost,
165+
body: requestBody,
166+
retries: 1,
167+
wantStatus: http.StatusOK,
168+
wantBody: requestBody,
169+
},
170+
{
171+
// Dial errors are always retried regardless of method, but there
172+
// is no body to re-read, so GET has always worked. Keep it as a
173+
// sanity check that we did not break the no-body path.
174+
name: "GET without body retried after dial error",
175+
method: http.MethodGet,
176+
body: "",
177+
retries: 1,
178+
wantStatus: http.StatusOK,
179+
wantBody: "",
180+
},
181+
{
182+
// Without any retry configuration the handler must give up on the
183+
// first dial error and return a 502. Confirms no wrapping occurs
184+
// in the no-retry path.
185+
name: "no retries configured returns 502 on dial error",
186+
method: http.MethodPost,
187+
body: requestBody,
188+
retries: 0,
189+
wantStatus: http.StatusBadGateway,
190+
wantBody: "",
191+
},
192+
}
193+
194+
for _, tc := range tests {
195+
t.Run(tc.name, func(t *testing.T) {
196+
dead := deadUpstreamAddr(t)
197+
198+
// Build the upstream pool. RoundRobinSelection starts its
199+
// counter at 0 and increments before returning, so with a
200+
// two-element pool it picks index 1 first, then index 0.
201+
// Put the good upstream at index 0 and the dead one at
202+
// index 1 so that:
203+
// attempt 1 → pool[1] = dead → DialError (ECONNREFUSED)
204+
// attempt 2 → pool[0] = good → 200
205+
upstreams := []*Upstream{
206+
{Host: new(Host), Dial: goodServer.Listener.Addr().String()},
207+
{Host: new(Host), Dial: dead},
208+
}
209+
if tc.retries == 0 {
210+
// For the "no retries" case use only the dead upstream so
211+
// there is nowhere to retry to.
212+
upstreams = []*Upstream{
213+
{Host: new(Host), Dial: dead},
214+
}
215+
}
216+
217+
h := minimalHandler(tc.retries, upstreams...)
218+
219+
// Use closeOnCloseReader so that Close() truly prevents further
220+
// reads, matching real http.body semantics. io.NopCloser would
221+
// mask the bug because its Close is a no-op.
222+
var bodyReader io.ReadCloser
223+
if tc.body != "" {
224+
bodyReader = newCloseOnCloseReader(tc.body)
225+
}
226+
req := httptest.NewRequest(tc.method, "http://example.com/", bodyReader)
227+
if bodyReader != nil {
228+
// httptest.NewRequest wraps the reader in NopCloser; replace
229+
// it with our close-aware reader so Close() is propagated.
230+
req.Body = bodyReader
231+
req.ContentLength = int64(len(tc.body))
232+
}
233+
req = prepareTestRequest(req)
234+
235+
rec := httptest.NewRecorder()
236+
err := h.ServeHTTP(rec, req, caddyhttp.HandlerFunc(func(w http.ResponseWriter, r *http.Request) error {
237+
return nil
238+
}))
239+
240+
// For error cases (e.g. 502) ServeHTTP returns a HandlerError
241+
// rather than writing the status itself.
242+
gotStatus := rec.Code
243+
if err != nil {
244+
if herr, ok := err.(caddyhttp.HandlerError); ok {
245+
gotStatus = herr.StatusCode
246+
}
247+
}
248+
249+
if gotStatus != tc.wantStatus {
250+
t.Errorf("status: got %d, want %d (err=%v)", gotStatus, tc.wantStatus, err)
251+
}
252+
if tc.wantBody != "" && rec.Body.String() != tc.wantBody {
253+
t.Errorf("body: got %q, want %q", rec.Body.String(), tc.wantBody)
254+
}
255+
})
256+
}
257+
}

modules/caddyhttp/reverseproxy/reverseproxy.go

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -482,18 +482,31 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyht
482482
reqHost := clonedReq.Host
483483
reqHeader := clonedReq.Header
484484

485-
// If the cloned request body was fully buffered, keep a reference to its
486-
// buffer so we can reuse it across retries and return it to the pool
487-
// once we’re done.
485+
// When retries are configured and there is a body, wrap it in
486+
// io.NopCloser to prevent Go's transport from closing it on dial
487+
// errors. cloneRequest does a shallow copy, so clonedReq.Body and
488+
// r.Body share the same io.ReadCloser — a dial-failure Close()
489+
// would kill the original body for all subsequent retry attempts.
490+
// The real body is closed by the HTTP server when the handler
491+
// returns.
492+
//
493+
// If the body was already fully buffered (via request_buffers),
494+
// we also extract the buffer so the retry loop can replay it
495+
// from the beginning on each attempt. (see #6259, #7546)
488496
var bufferedReqBody *bytes.Buffer
489-
if reqBodyBuf, ok := clonedReq.Body.(bodyReadCloser); ok && reqBodyBuf.body == nil && reqBodyBuf.buf != nil {
490-
bufferedReqBody = reqBodyBuf.buf
491-
reqBodyBuf.buf = nil
492-
493-
defer func() {
494-
bufferedReqBody.Reset()
495-
bufPool.Put(bufferedReqBody)
496-
}()
497+
if clonedReq.Body != nil && h.LoadBalancing != nil &&
498+
(h.LoadBalancing.Retries > 0 || h.LoadBalancing.TryDuration > 0) {
499+
if reqBodyBuf, ok := clonedReq.Body.(bodyReadCloser); ok && reqBodyBuf.body == nil && reqBodyBuf.buf != nil {
500+
bufferedReqBody = reqBodyBuf.buf
501+
reqBodyBuf.buf = nil
502+
clonedReq.Body = io.NopCloser(bytes.NewReader(bufferedReqBody.Bytes()))
503+
defer func() {
504+
bufferedReqBody.Reset()
505+
bufPool.Put(bufferedReqBody)
506+
}()
507+
} else {
508+
clonedReq.Body = io.NopCloser(clonedReq.Body)
509+
}
497510
}
498511

499512
start := time.Now()

0 commit comments

Comments
 (0)