Skip to content

Commit d6ba755

Browse files
committed
NetlinkRequest: retry on ErrDumpInterrupted in NetlinkRequest.Execute
The addition of ErrDumpInterrupted forced all callers to deal explicitly with interrupted dumps, while typically the caller should simply retry. This commit adds a retry system that makes the library retry up to 10 times before finally giving up with ErrDumpInterrupted. This should lead to fewer surprised users. It can be enabled by calling Handle.RetryInterrupted. Signed-off-by: Timo Beckers <timo@incline.eu>
1 parent ddba687 commit d6ba755

File tree

2 files changed

+49
-10
lines changed

2 files changed

+49
-10
lines changed

handle_linux.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@ import (
1313
var pkgHandle = &Handle{}
1414

1515
type HandleOptions struct {
16-
lookupByDump bool
17-
collectVFInfo bool
16+
lookupByDump bool
17+
collectVFInfo bool
18+
retryInterrupted bool
1819
}
1920

2021
// Handle is a handle for the netlink requests on a
@@ -32,6 +33,14 @@ func (h *Handle) DisableVFInfoCollection() *Handle {
3233
return h
3334
}
3435

36+
// RetryInterrupted configures the Handle to automatically retry dump operations
37+
// a number of times if they fail with EINTR before finally returning
38+
// [ErrDumpInterrupted].
39+
func (h *Handle) RetryInterrupted() *Handle {
40+
h.options.retryInterrupted = true
41+
return h
42+
}
43+
3544
// SetSocketTimeout configures timeout for default netlink sockets
3645
func SetSocketTimeout(to time.Duration) error {
3746
if to < time.Microsecond {
@@ -197,5 +206,7 @@ func (h *Handle) newNetlinkRequest(proto, flags int) *nl.NetlinkRequest {
197206
Flags: unix.NLM_F_REQUEST | uint16(flags),
198207
},
199208
Sockets: h.sockets,
209+
210+
RetryInterrupted: h.options.retryInterrupted,
200211
}
201212
}

nl/nl_linux.go

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,8 @@ type NetlinkRequest struct {
465465
Data []NetlinkRequestData
466466
RawData []byte
467467
Sockets map[int]*SocketHandle
468+
469+
RetryInterrupted bool
468470
}
469471

470472
// Serialize the Netlink Request into a byte array
@@ -510,15 +512,41 @@ func (req *NetlinkRequest) AddRawData(data []byte) {
510512
// If the returned error is [ErrDumpInterrupted], results may be inconsistent
511513
// or incomplete.
512514
func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, error) {
513-
var res [][]byte
514-
err := req.ExecuteIter(sockType, resType, func(msg []byte) bool {
515-
res = append(res, msg)
516-
return true
517-
})
518-
if err != nil && !errors.Is(err, ErrDumpInterrupted) {
519-
return nil, err
515+
const attempts = 10
516+
var (
517+
err error
518+
lastRes [][]byte
519+
)
520+
for range attempts {
521+
var res [][]byte
522+
err = req.ExecuteIter(sockType, resType, func(msg []byte) bool {
523+
res = append(res, msg)
524+
return true
525+
})
526+
if errors.Is(err, ErrDumpInterrupted) {
527+
// Hang on to the last result, callers can use partial results even if the
528+
// dump was interrupted.
529+
lastRes = res
530+
531+
// Only retry if the Request is configured to do so for backwards
532+
// compatibility.
533+
if req.RetryInterrupted {
534+
continue
535+
}
536+
}
537+
if err == nil {
538+
return res, nil
539+
}
540+
break
520541
}
521-
return res, err
542+
543+
if errors.Is(err, ErrDumpInterrupted) {
544+
return lastRes, fmt.Errorf("execute netlink request (%d attempts): %w", attempts, err)
545+
}
546+
547+
// Do not wrap the error from ExecuteIter. It gets type-asserted and replaced
548+
// with sentinels in some callers, and wrapping breaks that.
549+
return nil, err
522550
}
523551

524552
// ExecuteIter executes the request against the given sockType.

0 commit comments

Comments
 (0)