Skip to content

Commit 0a1dc9a

Browse files
committed
feat: coderabbit review comments changes
Signed-off-by: Tanisha goyal <[email protected]>
1 parent 009d4bd commit 0a1dc9a

File tree

3 files changed

+132
-73
lines changed

3 files changed

+132
-73
lines changed

tests/helpers/kube.go

Lines changed: 107 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2272,34 +2272,16 @@ func waitForDaemonSetRollout(ctx context.Context, t *testing.T, client klient.Cl
22722272
t.Logf("DaemonSet %s/%s rollout completed successfully", NVSentinelNamespace, name)
22732273
}
22742274

2275-
// updateContainerProcessingStrategy updates the processing strategy argument for a specific container.
2276-
func updateContainerProcessingStrategy(container *v1.Container) {
2277-
processingStrategyArg := "--processing-strategy=STORE_ONLY"
2278-
2279-
for j, arg := range container.Args {
2280-
if strings.HasPrefix(arg, "--processing-strategy=") {
2281-
container.Args[j] = processingStrategyArg
2282-
return
2283-
}
2284-
2285-
if arg == "--processing-strategy" && j+1 < len(container.Args) {
2286-
container.Args[j+1] = "STORE_ONLY"
2287-
return
2288-
}
2289-
}
2290-
2291-
container.Args = append(container.Args, processingStrategyArg)
2292-
}
2293-
2294-
// UpdateDaemonSetProcessingStrategy updates the daemonset to use STORE_ONLY processing strategy.
2295-
func UpdateDaemonSetProcessingStrategy(ctx context.Context, t *testing.T,
2296-
client klient.Client, daemonsetName string, containerName string) (*appsv1.DaemonSet, error) {
2275+
// UpdateDaemonSetArgs updates the daemonset with the specified arguments and complete the rollout.
2276+
func UpdateDaemonSetArgs(ctx context.Context, t *testing.T,
2277+
client klient.Client, daemonsetName string, containerName string,
2278+
args map[string]string) error {
22972279
t.Helper()
22982280

2299-
t.Logf("Updating daemonset %s/%s to use STORE_ONLY processing strategy", NVSentinelNamespace, daemonsetName)
2300-
23012281
var originalDaemonSet *appsv1.DaemonSet
23022282

2283+
t.Logf("Updating daemonset %s/%s with args %v", NVSentinelNamespace, daemonsetName, args)
2284+
23032285
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
23042286
daemonSet := &appsv1.DaemonSet{}
23052287
if err := client.Resources().Get(ctx, daemonsetName, NVSentinelNamespace, daemonSet); err != nil {
@@ -2310,18 +2292,18 @@ func UpdateDaemonSetProcessingStrategy(ctx context.Context, t *testing.T,
23102292
originalDaemonSet = daemonSet.DeepCopy()
23112293
}
23122294

2313-
for i := range daemonSet.Spec.Template.Spec.Containers {
2314-
container := &daemonSet.Spec.Template.Spec.Containers[i]
2315-
if container.Name == containerName {
2316-
updateContainerProcessingStrategy(container)
2295+
containers := daemonSet.Spec.Template.Spec.Containers
2296+
for i := range containers {
2297+
if containers[i].Name == containerName {
2298+
setArgsOnContainer(t, &containers[i], args)
23172299
break
23182300
}
23192301
}
23202302

23212303
return client.Resources().Update(ctx, daemonSet)
23222304
})
23232305
if err != nil {
2324-
return nil, err
2306+
return err
23252307
}
23262308

23272309
t.Logf("Waiting for daemonset %s/%s rollout to complete", NVSentinelNamespace, daemonsetName)
@@ -2330,34 +2312,35 @@ func UpdateDaemonSetProcessingStrategy(ctx context.Context, t *testing.T,
23302312
t.Logf("Waiting 10 seconds for daemonset pods to start")
23312313
time.Sleep(10 * time.Second)
23322314

2333-
return originalDaemonSet, nil
2315+
return nil
23342316
}
23352317

2336-
func RestoreDaemonSet(ctx context.Context, t *testing.T, client klient.Client,
2337-
originalDaemonSet *appsv1.DaemonSet, daemonsetName string,
2318+
func RemoveDaemonSetArgs(ctx context.Context, t *testing.T, client klient.Client,
2319+
daemonsetName string,
2320+
containerName string, args map[string]string,
23382321
) error {
23392322
t.Helper()
23402323

2341-
if originalDaemonSet == nil {
2342-
t.Log("No original daemonset to restore, skipping")
2343-
return nil
2344-
}
2345-
2346-
t.Logf("Restoring daemonset %s/%s to original state", NVSentinelNamespace, daemonsetName)
2324+
t.Logf("Removing args %v from daemonset %s/%s", args, NVSentinelNamespace, daemonsetName)
23472325

23482326
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
23492327
daemonSet := &appsv1.DaemonSet{}
23502328
if err := client.Resources().Get(ctx, daemonsetName, NVSentinelNamespace, daemonSet); err != nil {
23512329
return err
23522330
}
23532331

2354-
daemonSet.Spec.Template.Spec.Containers = originalDaemonSet.Spec.Template.Spec.Containers
2332+
containers := daemonSet.Spec.Template.Spec.Containers
2333+
2334+
for i := range containers {
2335+
if containers[i].Name == containerName {
2336+
removeArgsFromContainer(&containers[i], args)
2337+
break
2338+
}
2339+
}
23552340

23562341
return client.Resources().Update(ctx, daemonSet)
23572342
})
2358-
if err != nil {
2359-
return err
2360-
}
2343+
require.NoError(t, err, "failed to remove args from daemonset %s/%s", NVSentinelNamespace, daemonsetName)
23612344

23622345
t.Logf("Waiting for daemonset %s/%s rollout to complete after restoration", NVSentinelNamespace, daemonsetName)
23632346
waitForDaemonSetRollout(ctx, t, client, daemonsetName)
@@ -2367,6 +2350,88 @@ func RestoreDaemonSet(ctx context.Context, t *testing.T, client klient.Client,
23672350
return nil
23682351
}
23692352

2353+
// tryUpdateExistingArg attempts to update an existing argument at position j.
2354+
// Returns true if the argument was found and updated.
2355+
func tryUpdateExistingArg(container *v1.Container, j int, flag, value string) bool {
2356+
existingArg := container.Args[j]
2357+
2358+
// Match --flag=value style
2359+
if strings.HasPrefix(existingArg, flag+"=") {
2360+
if value != "" {
2361+
container.Args[j] = flag + "=" + value
2362+
} else {
2363+
container.Args[j] = flag
2364+
}
2365+
2366+
return true
2367+
}
2368+
2369+
// Match --flag or --flag value style
2370+
if existingArg == flag {
2371+
if value != "" {
2372+
if j+1 < len(container.Args) && !strings.HasPrefix(container.Args[j+1], "-") {
2373+
container.Args[j+1] = value
2374+
} else {
2375+
container.Args = append(container.Args[:j+1], append([]string{value}, container.Args[j+1:]...)...)
2376+
}
2377+
}
2378+
2379+
return true
2380+
}
2381+
2382+
return false
2383+
}
2384+
2385+
func setArgsOnContainer(t *testing.T, container *v1.Container, args map[string]string) {
2386+
t.Helper()
2387+
t.Logf("Setting args %v on container %s", args, container.Name)
2388+
2389+
for flag, value := range args {
2390+
found := false
2391+
2392+
for j := 0; j < len(container.Args); j++ {
2393+
if tryUpdateExistingArg(container, j, flag, value) {
2394+
found = true
2395+
break
2396+
}
2397+
}
2398+
2399+
if !found {
2400+
if value != "" {
2401+
container.Args = append(container.Args, flag+"="+value)
2402+
} else {
2403+
container.Args = append(container.Args, flag)
2404+
}
2405+
}
2406+
}
2407+
}
2408+
2409+
func removeArgsFromContainer(container *v1.Container, args map[string]string) {
2410+
for flag := range args {
2411+
for j := 0; j < len(container.Args); j++ {
2412+
existingArg := container.Args[j]
2413+
2414+
// Match --flag=value style
2415+
if strings.HasPrefix(existingArg, flag+"=") {
2416+
container.Args = append(container.Args[:j], container.Args[j+1:]...)
2417+
break
2418+
}
2419+
2420+
// Match --flag or --flag value style
2421+
2422+
if existingArg == flag {
2423+
if j+1 < len(container.Args) && !strings.HasPrefix(container.Args[j+1], "-") {
2424+
container.Args = append(container.Args[:j], container.Args[j+2:]...)
2425+
} else {
2426+
container.Args = append(container.Args[:j], container.Args[j+1:]...)
2427+
}
2428+
2429+
break
2430+
}
2431+
}
2432+
}
2433+
}
2434+
23702435
func GetDaemonSetPodOnWorkerNode(ctx context.Context, t *testing.T, client klient.Client,
23712436
daemonsetName string, podNamePattern string) (*v1.Pod, error) {
23722437
t.Helper()

tests/helpers/syslog-health-monitor.go

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919
"testing"
2020

2121
"github.com/stretchr/testify/require"
22-
appsv1 "k8s.io/api/apps/v1"
2322
v1 "k8s.io/api/core/v1"
2423
"sigs.k8s.io/e2e-framework/klient"
2524
)
@@ -32,15 +31,13 @@ const (
3231

3332
// helper function to set up syslog health monitor and port forward to it
3433
func SetUpSyslogHealthMonitor(ctx context.Context, t *testing.T,
35-
client klient.Client, updatedDaemonSet bool) (string, *appsv1.DaemonSet, *v1.Pod, chan struct{}) {
36-
var originalDaemonSet *appsv1.DaemonSet
37-
34+
client klient.Client, args map[string]string) (string, *v1.Pod, chan struct{}) {
3835
var err error
3936

4037
var syslogPod *v1.Pod
4138

42-
if updatedDaemonSet {
43-
originalDaemonSet, err = UpdateDaemonSetProcessingStrategy(ctx, t, client, SyslogDaemonSetName, SyslogContainerName)
39+
if args != nil {
40+
err = UpdateDaemonSetArgs(ctx, t, client, SyslogDaemonSetName, SyslogContainerName, args)
4441
require.NoError(t, err, "failed to update syslog health monitor processing strategy")
4542
}
4643

@@ -70,18 +67,18 @@ func SetUpSyslogHealthMonitor(ctx context.Context, t *testing.T,
7067
err = SetNodeManagedByNVSentinel(ctx, client, testNodeName, false)
7168
require.NoError(t, err, "failed to set ManagedByNVSentinel label")
7269

73-
return testNodeName, originalDaemonSet, syslogPod, stopChan
70+
return testNodeName, syslogPod, stopChan
7471
}
7572

7673
// helper function to roll back syslog health monitor daemonset and stop the port forward
7774
func TearDownSyslogHealthMonitor(ctx context.Context, t *testing.T, client klient.Client,
78-
originalDaemonSet *appsv1.DaemonSet, nodeName string, stopChan chan struct{},
79-
updatedDaemonSet bool, podName string) {
75+
nodeName string, stopChan chan struct{},
76+
args map[string]string, podName string) {
8077
t.Log("Stopping port-forward")
8178
close(stopChan)
8279

83-
if updatedDaemonSet {
84-
err := RestoreDaemonSet(ctx, t, client, originalDaemonSet, "syslog-health-monitor-regular")
80+
if args != nil {
81+
err := RemoveDaemonSetArgs(ctx, t, client, SyslogDaemonSetName, SyslogContainerName, args)
8582
require.NoError(t, err, "failed to restore syslog health monitor daemon set")
8683
}
8784

tests/syslog_health_monitor_test.go

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525
"tests/helpers"
2626

2727
"github.com/stretchr/testify/require"
28-
appsv1 "k8s.io/api/apps/v1"
2928
v1 "k8s.io/api/core/v1"
3029
"sigs.k8s.io/e2e-framework/pkg/envconf"
3130
"sigs.k8s.io/e2e-framework/pkg/features"
@@ -34,10 +33,9 @@ import (
3433
type contextKey string
3534

3635
const (
37-
keySyslogNodeName contextKey = "nodeName"
38-
keyStopChan contextKey = "stopChan"
39-
keySyslogPodName contextKey = "syslogPodName"
40-
keySyslogDaemonSet contextKey = "syslogDaemonSet"
36+
keySyslogNodeName contextKey = "nodeName"
37+
keyStopChan contextKey = "stopChan"
38+
keySyslogPodName contextKey = "syslogPodName"
4139
)
4240

4341
// TestSyslogHealthMonitorXIDDetection tests burst XID injection and aggregation
@@ -50,13 +48,12 @@ func TestSyslogHealthMonitorXIDDetection(t *testing.T) {
5048
client, err := c.NewClient()
5149
require.NoError(t, err, "failed to create kubernetes client")
5250

53-
testNodeName, originalDaemonSet, syslogPod, stopChan := helpers.SetUpSyslogHealthMonitor(ctx, t, client, false)
51+
testNodeName, syslogPod, stopChan := helpers.SetUpSyslogHealthMonitor(ctx, t, client, nil)
5452
require.NoError(t, err, "failed to set up syslog health monitor")
5553

5654
ctx = context.WithValue(ctx, keySyslogNodeName, testNodeName)
5755
ctx = context.WithValue(ctx, keySyslogPodName, syslogPod.Name)
5856
ctx = context.WithValue(ctx, keyStopChan, stopChan)
59-
ctx = context.WithValue(ctx, keySyslogDaemonSet, originalDaemonSet)
6057

6158
return ctx
6259
})
@@ -165,11 +162,10 @@ func TestSyslogHealthMonitorXIDDetection(t *testing.T) {
165162
require.NoError(t, err, "failed to create kubernetes client")
166163

167164
nodeName := ctx.Value(keySyslogNodeName).(string)
168-
originalDaemonSet := ctx.Value(keySyslogDaemonSet).(*appsv1.DaemonSet)
169165
syslogPod := ctx.Value(keySyslogPodName).(string)
170166
stopChan := ctx.Value(keyStopChan).(chan struct{})
171167

172-
helpers.TearDownSyslogHealthMonitor(ctx, t, client, originalDaemonSet, nodeName, stopChan, false, syslogPod)
168+
helpers.TearDownSyslogHealthMonitor(ctx, t, client, nodeName, stopChan, nil, syslogPod)
173169

174170
return ctx
175171
})
@@ -301,10 +297,9 @@ func TestSyslogHealthMonitorSXIDDetection(t *testing.T) {
301297
client, err := c.NewClient()
302298
require.NoError(t, err, "failed to create kubernetes client")
303299

304-
testNodeName, originalDaemonSet, syslogPod, stopChan := helpers.SetUpSyslogHealthMonitor(ctx, t, client, false)
300+
testNodeName, syslogPod, stopChan := helpers.SetUpSyslogHealthMonitor(ctx, t, client, nil)
305301

306302
ctx = context.WithValue(ctx, keySyslogNodeName, testNodeName)
307-
ctx = context.WithValue(ctx, keySyslogDaemonSet, originalDaemonSet)
308303
ctx = context.WithValue(ctx, keySyslogPodName, syslogPod.Name)
309304
ctx = context.WithValue(ctx, keyStopChan, stopChan)
310305
return ctx
@@ -354,10 +349,9 @@ func TestSyslogHealthMonitorSXIDDetection(t *testing.T) {
354349

355350
nodeName := ctx.Value(keySyslogNodeName).(string)
356351
stopChan := ctx.Value(keyStopChan).(chan struct{})
357-
originalDaemonSet := ctx.Value(keySyslogDaemonSet).(*appsv1.DaemonSet)
358352
syslogPod := ctx.Value(keySyslogPodName).(string)
359353

360-
helpers.TearDownSyslogHealthMonitor(ctx, t, client, originalDaemonSet, nodeName, stopChan, false, syslogPod)
354+
helpers.TearDownSyslogHealthMonitor(ctx, t, client, nodeName, stopChan, nil, syslogPod)
361355

362356
return ctx
363357
})
@@ -375,16 +369,17 @@ func TestSyslogHealthMonitorStoreOnlyStrategy(t *testing.T) {
375369
client, err := c.NewClient()
376370
require.NoError(t, err, "failed to create kubernetes client")
377371

378-
testNodeName, originalDaemonSet, syslogPod, stopChan := helpers.SetUpSyslogHealthMonitor(ctx, t, client, true)
372+
testNodeName, syslogPod, stopChan := helpers.SetUpSyslogHealthMonitor(ctx, t, client, map[string]string{
373+
"--processing-strategy": "STORE_ONLY",
374+
})
379375

380376
ctx = context.WithValue(ctx, keySyslogNodeName, testNodeName)
381377
ctx = context.WithValue(ctx, keyStopChan, stopChan)
382378
ctx = context.WithValue(ctx, keySyslogPodName, syslogPod.Name)
383-
ctx = context.WithValue(ctx, keySyslogDaemonSet, originalDaemonSet)
384379
return ctx
385380
})
386381

387-
feature.Assess("Inject XID errors and verify GPU lookup via NVSwitch topology", func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context {
382+
feature.Assess("Inject XID errors and verify no node condition is created when running in STORE_ONLY strategy", func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context {
388383
client, err := c.NewClient()
389384
require.NoError(t, err, "failed to create kubernetes client")
390385

@@ -396,10 +391,10 @@ func TestSyslogHealthMonitorStoreOnlyStrategy(t *testing.T) {
396391

397392
helpers.InjectSyslogMessages(t, helpers.StubJournalHTTPPort, xidMessages)
398393

399-
t.Log("Verifying no node condition is created when processing STORE_ONLY strategy")
394+
t.Log("Verifying no node condition is created")
400395
helpers.EnsureNodeConditionNotPresent(ctx, t, client, nodeName, "SysLogsXIDError")
401396

402-
t.Log("Verifying node was not cordoned when processing STORE_ONLY strategy")
397+
t.Log("Verifying node was not cordoned")
403398
helpers.AssertQuarantineState(ctx, t, client, nodeName, helpers.QuarantineAssertion{
404399
ExpectCordoned: false,
405400
ExpectAnnotation: false,
@@ -411,12 +406,14 @@ func TestSyslogHealthMonitorStoreOnlyStrategy(t *testing.T) {
411406
feature.Teardown(func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context {
412407
client, err := c.NewClient()
413408
require.NoError(t, err, "failed to create kubernetes client")
409+
414410
nodeName := ctx.Value(keySyslogNodeName).(string)
415411
stopChan := ctx.Value(keyStopChan).(chan struct{})
416412
syslogPod := ctx.Value(keySyslogPodName).(string)
417413

418-
originalDaemonSet := ctx.Value(keySyslogDaemonSet).(*appsv1.DaemonSet)
419-
helpers.TearDownSyslogHealthMonitor(ctx, t, client, originalDaemonSet, nodeName, stopChan, true, syslogPod)
414+
helpers.TearDownSyslogHealthMonitor(ctx, t, client, nodeName, stopChan, map[string]string{
415+
"--processing-strategy": "EXECUTE_REMEDIATION",
416+
}, syslogPod)
420417

421418
return ctx
422419
})

0 commit comments

Comments
 (0)