Skip to content

Commit e3c369d

Browse files
authored
logging: Implement dial timeout for net writer (fix #4083) (#4172)
* logging: Implement dial timeout for net writer (fix #4083) * Limit how often redials are attempted This should cause dial blocking to occur only once every 10 seconds at most, but it also means the logger connection might be down for up to 10 seconds after it comes back online; oh well. We shouldn't block for DialTimeout at every single log emission. * Clarify offline behavior
1 parent c052162 commit e3c369d

File tree

1 file changed

+61
-14
lines changed

1 file changed

+61
-14
lines changed

modules/logging/netwriter.go

Lines changed: 61 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ import (
1818
"fmt"
1919
"io"
2020
"net"
21+
"os"
2122
"sync"
23+
"time"
2224

2325
"github.com/caddyserver/caddy/v2"
2426
"github.com/caddyserver/caddy/v2/caddyconfig/caddyfile"
@@ -28,10 +30,16 @@ func init() {
2830
caddy.RegisterModule(NetWriter{})
2931
}
3032

31-
// NetWriter implements a log writer that outputs to a network socket.
33+
// NetWriter implements a log writer that outputs to a network socket. If
34+
// the socket goes down, it will dump logs to stderr while it attempts to
35+
// reconnect.
3236
type NetWriter struct {
37+
// The address of the network socket to which to connect.
3338
Address string `json:"address,omitempty"`
3439

40+
// The timeout to wait while connecting to the socket.
41+
DialTimeout caddy.Duration `json:"dial_timeout,omitempty"`
42+
3543
addr caddy.NetworkAddress
3644
}
3745

@@ -60,6 +68,10 @@ func (nw *NetWriter) Provision(ctx caddy.Context) error {
6068
return fmt.Errorf("multiple ports not supported")
6169
}
6270

71+
if nw.DialTimeout < 0 {
72+
return fmt.Errorf("timeout cannot be less than 0")
73+
}
74+
6375
return nil
6476
}
6577

@@ -74,7 +86,10 @@ func (nw NetWriter) WriterKey() string {
7486

7587
// OpenWriter opens a new network connection.
7688
func (nw NetWriter) OpenWriter() (io.WriteCloser, error) {
77-
reconn := &redialerConn{nw: nw}
89+
reconn := &redialerConn{
90+
nw: nw,
91+
timeout: time.Duration(nw.DialTimeout),
92+
}
7893
conn, err := reconn.dial()
7994
if err != nil {
8095
return nil, err
@@ -87,7 +102,9 @@ func (nw NetWriter) OpenWriter() (io.WriteCloser, error) {
87102

88103
// UnmarshalCaddyfile sets up the handler from Caddyfile tokens. Syntax:
89104
//
90-
// net <address>
105+
// net <address> {
106+
// dial_timeout <duration>
107+
// }
91108
//
92109
func (nw *NetWriter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
93110
for d.Next() {
@@ -98,6 +115,22 @@ func (nw *NetWriter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
98115
if d.NextArg() {
99116
return d.ArgErr()
100117
}
118+
for nesting := d.Nesting(); d.NextBlock(nesting); {
119+
switch d.Val() {
120+
case "dial_timeout":
121+
if !d.NextArg() {
122+
return d.ArgErr()
123+
}
124+
timeout, err := caddy.ParseDuration(d.Val())
125+
if err != nil {
126+
return d.Errf("invalid duration: %s", d.Val())
127+
}
128+
if d.NextArg() {
129+
return d.ArgErr()
130+
}
131+
nw.DialTimeout = caddy.Duration(timeout)
132+
}
133+
}
101134
}
102135
return nil
103136
}
@@ -107,8 +140,10 @@ func (nw *NetWriter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
107140
// is retried.
108141
type redialerConn struct {
109142
net.Conn
110-
connMu sync.RWMutex
111-
nw NetWriter
143+
connMu sync.RWMutex
144+
nw NetWriter
145+
timeout time.Duration
146+
lastRedial time.Time
112147
}
113148

114149
// Write wraps the underlying Conn.Write method, but if that fails,
@@ -131,20 +166,32 @@ func (reconn *redialerConn) Write(b []byte) (n int, err error) {
131166
return
132167
}
133168

134-
// we're the lucky first goroutine to re-dial the connection
135-
conn2, err2 := reconn.dial()
136-
if err2 != nil {
137-
return
138-
}
139-
if n, err = conn2.Write(b); err == nil {
140-
reconn.Conn.Close()
141-
reconn.Conn = conn2
169+
// there's still a problem, so try to re-attempt dialing the socket
170+
// if some time has passed in which the issue could have potentially
171+
// been resolved - we don't want to block at every single log
172+
// emission (!) - see discussion in #4111
173+
if time.Since(reconn.lastRedial) > 10*time.Second {
174+
reconn.lastRedial = time.Now()
175+
conn2, err2 := reconn.dial()
176+
if err2 != nil {
177+
// logger socket still offline; instead of discarding the log, dump it to stderr
178+
os.Stderr.Write(b)
179+
return
180+
}
181+
if n, err = conn2.Write(b); err == nil {
182+
reconn.Conn.Close()
183+
reconn.Conn = conn2
184+
}
185+
} else {
186+
// last redial attempt was too recent; just dump to stderr for now
187+
os.Stderr.Write(b)
142188
}
189+
143190
return
144191
}
145192

146193
func (reconn *redialerConn) dial() (net.Conn, error) {
147-
return net.Dial(reconn.nw.addr.Network, reconn.nw.addr.JoinHostPort(0))
194+
return net.DialTimeout(reconn.nw.addr.Network, reconn.nw.addr.JoinHostPort(0), reconn.timeout)
148195
}
149196

150197
// Interface guards

0 commit comments

Comments
 (0)