Skip to content

Commit f6c8442

Browse files
authored
feat(os, peermem): ignore lsmod, system virt cmd timeouts (as non-actionable) (#531)
1 parent 5a7b98c commit f6c8442

File tree

3 files changed

+17
-11
lines changed

3 files changed

+17
-11
lines changed

components/os/component_output.go

+8-3
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package os
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
67
"runtime"
78
"sync"
@@ -266,7 +267,7 @@ func setDefaultPoller(cfg Config, eventBucket eventstore.Bucket) {
266267
defaultPoller = query.New(
267268
os_id.Name,
268269
cfg.Query,
269-
createGet(cfg, eventBucket),
270+
createGet(eventBucket),
270271
nil,
271272
)
272273
})
@@ -278,15 +279,19 @@ func getDefaultPoller() query.Poller {
278279

279280
var getSystemdDetectVirtFunc = pkg_host.SystemdDetectVirt
280281

281-
func createGet(cfg Config, eventBucket eventstore.Bucket) func(ctx context.Context) (_ any, e error) {
282+
func createGet(eventBucket eventstore.Bucket) func(ctx context.Context) (_ any, e error) {
282283
return func(ctx context.Context) (_ any, e error) {
283284
o := &Output{}
284285

285286
cctx, ccancel := context.WithTimeout(ctx, 15*time.Second)
286287
virtEnv, err := getSystemdDetectVirtFunc(cctx)
287288
ccancel()
288289
if err != nil {
289-
return nil, fmt.Errorf("failed to get virtualization environment using 'systemd-detect-virt': %w", err)
290+
// ignore "context.DeadlineExceeded" since it's not a critical error and it's non-actionable
291+
if !errors.Is(err, context.DeadlineExceeded) {
292+
return nil, fmt.Errorf("failed to get virtualization environment using 'systemd-detect-virt': %w", err)
293+
}
294+
log.Logger.Warnw("failed to get virtualization environment using 'systemd-detect-virt'", "error", err)
290295
}
291296
o.VirtualizationEnvironment = virtEnv
292297

components/os/component_output_test.go

+3-7
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,9 @@ func TestGet(t *testing.T) {
1818
ctx, cancel := context.WithTimeout(context.Background(), 0)
1919
defer cancel()
2020

21-
getFunc := createGet(Config{}, nil)
21+
getFunc := createGet(nil)
2222
_, err := getFunc(ctx)
23-
if err == nil {
24-
t.Fatalf("expected error, got nil")
25-
}
26-
expectedError := "failed to get virtualization environment using 'systemd-detect-virt': context deadline exceeded"
27-
if err.Error() != expectedError {
28-
t.Fatalf("expected error: %s, got: %s", expectedError, err.Error())
23+
if err != nil {
24+
t.Fatalf("expected nil")
2925
}
3026
}

pkg/nvidia-query/query.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,12 @@ func Get(ctx context.Context, opts ...OpOption) (output any, err error) {
134134
o.LsmodPeermem, err = peermem.CheckLsmodPeermemModule(cctx)
135135
ccancel()
136136
if err != nil {
137-
o.LsmodPeermemErrors = append(o.LsmodPeermemErrors, err.Error())
137+
// ignore "context.DeadlineExceeded" since it's not a critical error and it's non-actionable
138+
if !errors.Is(err, context.DeadlineExceeded) {
139+
o.LsmodPeermemErrors = append(o.LsmodPeermemErrors, err.Error())
140+
} else {
141+
log.Logger.Warnw("lsmod peermem check timed out", "error", err)
142+
}
138143
}
139144

140145
log.Logger.Infow("waiting for default nvml instance")

0 commit comments

Comments
 (0)