Skip to content

Commit 776a34b

Browse files
committed
feat: coderabbit review comments changes
Signed-off-by: Tanisha goyal <[email protected]>
1 parent a216b57 commit 776a34b

File tree

3 files changed

+132
-73
lines changed

3 files changed

+132
-73
lines changed

tests/helpers/kube.go

Lines changed: 107 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2248,34 +2248,16 @@ func waitForDaemonSetRollout(ctx context.Context, t *testing.T, client klient.Cl
22482248
t.Logf("DaemonSet %s/%s rollout completed successfully", NVSentinelNamespace, name)
22492249
}
22502250

2251-
// updateContainerProcessingStrategy updates the processing strategy argument for a specific container.
2252-
func updateContainerProcessingStrategy(container *v1.Container) {
2253-
processingStrategyArg := "--processing-strategy=STORE_ONLY"
2254-
2255-
for j, arg := range container.Args {
2256-
if strings.HasPrefix(arg, "--processing-strategy=") {
2257-
container.Args[j] = processingStrategyArg
2258-
return
2259-
}
2260-
2261-
if arg == "--processing-strategy" && j+1 < len(container.Args) {
2262-
container.Args[j+1] = "STORE_ONLY"
2263-
return
2264-
}
2265-
}
2266-
2267-
container.Args = append(container.Args, processingStrategyArg)
2268-
}
2269-
2270-
// UpdateDaemonSetProcessingStrategy updates the daemonset to use STORE_ONLY processing strategy.
2271-
func UpdateDaemonSetProcessingStrategy(ctx context.Context, t *testing.T,
2272-
client klient.Client, daemonsetName string, containerName string) (*appsv1.DaemonSet, error) {
2251+
// UpdateDaemonSetArgs updates the daemonset with the specified arguments and complete the rollout.
2252+
func UpdateDaemonSetArgs(ctx context.Context, t *testing.T,
2253+
client klient.Client, daemonsetName string, containerName string,
2254+
args map[string]string) error {
22732255
t.Helper()
22742256

2275-
t.Logf("Updating daemonset %s/%s to use STORE_ONLY processing strategy", NVSentinelNamespace, daemonsetName)
2276-
22772257
var originalDaemonSet *appsv1.DaemonSet
22782258

2259+
t.Logf("Updating daemonset %s/%s with args %v", NVSentinelNamespace, daemonsetName, args)
2260+
22792261
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
22802262
daemonSet := &appsv1.DaemonSet{}
22812263
if err := client.Resources().Get(ctx, daemonsetName, NVSentinelNamespace, daemonSet); err != nil {
@@ -2286,18 +2268,18 @@ func UpdateDaemonSetProcessingStrategy(ctx context.Context, t *testing.T,
22862268
originalDaemonSet = daemonSet.DeepCopy()
22872269
}
22882270

2289-
for i := range daemonSet.Spec.Template.Spec.Containers {
2290-
container := &daemonSet.Spec.Template.Spec.Containers[i]
2291-
if container.Name == containerName {
2292-
updateContainerProcessingStrategy(container)
2271+
containers := daemonSet.Spec.Template.Spec.Containers
2272+
for i := range containers {
2273+
if containers[i].Name == containerName {
2274+
setArgsOnContainer(t, &containers[i], args)
22932275
break
22942276
}
22952277
}
22962278

22972279
return client.Resources().Update(ctx, daemonSet)
22982280
})
22992281
if err != nil {
2300-
return nil, err
2282+
return err
23012283
}
23022284

23032285
t.Logf("Waiting for daemonset %s/%s rollout to complete", NVSentinelNamespace, daemonsetName)
@@ -2306,34 +2288,35 @@ func UpdateDaemonSetProcessingStrategy(ctx context.Context, t *testing.T,
23062288
t.Logf("Waiting 10 seconds for daemonset pods to start")
23072289
time.Sleep(10 * time.Second)
23082290

2309-
return originalDaemonSet, nil
2291+
return nil
23102292
}
23112293

2312-
func RestoreDaemonSet(ctx context.Context, t *testing.T, client klient.Client,
2313-
originalDaemonSet *appsv1.DaemonSet, daemonsetName string,
2294+
func RemoveDaemonSetArgs(ctx context.Context, t *testing.T, client klient.Client,
2295+
daemonsetName string,
2296+
containerName string, args map[string]string,
23142297
) error {
23152298
t.Helper()
23162299

2317-
if originalDaemonSet == nil {
2318-
t.Log("No original daemonset to restore, skipping")
2319-
return nil
2320-
}
2321-
2322-
t.Logf("Restoring daemonset %s/%s to original state", NVSentinelNamespace, daemonsetName)
2300+
t.Logf("Removing args %v from daemonset %s/%s", args, NVSentinelNamespace, daemonsetName)
23232301

23242302
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
23252303
daemonSet := &appsv1.DaemonSet{}
23262304
if err := client.Resources().Get(ctx, daemonsetName, NVSentinelNamespace, daemonSet); err != nil {
23272305
return err
23282306
}
23292307

2330-
daemonSet.Spec.Template.Spec.Containers = originalDaemonSet.Spec.Template.Spec.Containers
2308+
containers := daemonSet.Spec.Template.Spec.Containers
2309+
2310+
for i := range containers {
2311+
if containers[i].Name == containerName {
2312+
removeArgsFromContainer(&containers[i], args)
2313+
break
2314+
}
2315+
}
23312316

23322317
return client.Resources().Update(ctx, daemonSet)
23332318
})
2334-
if err != nil {
2335-
return err
2336-
}
2319+
require.NoError(t, err, "failed to remove args from daemonset %s/%s", NVSentinelNamespace, daemonsetName)
23372320

23382321
t.Logf("Waiting for daemonset %s/%s rollout to complete after restoration", NVSentinelNamespace, daemonsetName)
23392322
waitForDaemonSetRollout(ctx, t, client, daemonsetName)
@@ -2343,6 +2326,88 @@ func RestoreDaemonSet(ctx context.Context, t *testing.T, client klient.Client,
23432326
return nil
23442327
}
23452328

2329+
// tryUpdateExistingArg attempts to update an existing argument at position j.
2330+
// Returns true if the argument was found and updated.
2331+
func tryUpdateExistingArg(container *v1.Container, j int, flag, value string) bool {
2332+
existingArg := container.Args[j]
2333+
2334+
// Match --flag=value style
2335+
if strings.HasPrefix(existingArg, flag+"=") {
2336+
if value != "" {
2337+
container.Args[j] = flag + "=" + value
2338+
} else {
2339+
container.Args[j] = flag
2340+
}
2341+
2342+
return true
2343+
}
2344+
2345+
// Match --flag or --flag value style
2346+
if existingArg == flag {
2347+
if value != "" {
2348+
if j+1 < len(container.Args) && !strings.HasPrefix(container.Args[j+1], "-") {
2349+
container.Args[j+1] = value
2350+
} else {
2351+
container.Args = append(container.Args[:j+1], append([]string{value}, container.Args[j+1:]...)...)
2352+
}
2353+
}
2354+
2355+
return true
2356+
}
2357+
2358+
return false
2359+
}
2360+
2361+
func setArgsOnContainer(t *testing.T, container *v1.Container, args map[string]string) {
2362+
t.Helper()
2363+
t.Logf("Setting args %v on container %s", args, container.Name)
2364+
2365+
for flag, value := range args {
2366+
found := false
2367+
2368+
for j := 0; j < len(container.Args); j++ {
2369+
if tryUpdateExistingArg(container, j, flag, value) {
2370+
found = true
2371+
break
2372+
}
2373+
}
2374+
2375+
if !found {
2376+
if value != "" {
2377+
container.Args = append(container.Args, flag+"="+value)
2378+
} else {
2379+
container.Args = append(container.Args, flag)
2380+
}
2381+
}
2382+
}
2383+
}
2384+
2385+
func removeArgsFromContainer(container *v1.Container, args map[string]string) {
2386+
for flag := range args {
2387+
for j := 0; j < len(container.Args); j++ {
2388+
existingArg := container.Args[j]
2389+
2390+
// Match --flag=value style
2391+
if strings.HasPrefix(existingArg, flag+"=") {
2392+
container.Args = append(container.Args[:j], container.Args[j+1:]...)
2393+
break
2394+
}
2395+
2396+
// Match --flag or --flag value style
2397+
2398+
if existingArg == flag {
2399+
if j+1 < len(container.Args) && !strings.HasPrefix(container.Args[j+1], "-") {
2400+
container.Args = append(container.Args[:j], container.Args[j+2:]...)
2401+
} else {
2402+
container.Args = append(container.Args[:j], container.Args[j+1:]...)
2403+
}
2404+
2405+
break
2406+
}
2407+
}
2408+
}
2409+
}
2410+
23462411
func GetDaemonSetPodOnWorkerNode(ctx context.Context, t *testing.T, client klient.Client,
23472412
daemonsetName string, podNamePattern string) (*v1.Pod, error) {
23482413
t.Helper()

tests/helpers/syslog-health-monitor.go

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919
"testing"
2020

2121
"github.com/stretchr/testify/require"
22-
appsv1 "k8s.io/api/apps/v1"
2322
v1 "k8s.io/api/core/v1"
2423
"sigs.k8s.io/e2e-framework/klient"
2524
)
@@ -32,15 +31,13 @@ const (
3231

3332
// helper function to set up syslog health monitor and port forward to it
3433
func SetUpSyslogHealthMonitor(ctx context.Context, t *testing.T,
35-
client klient.Client, updatedDaemonSet bool) (string, *appsv1.DaemonSet, *v1.Pod, chan struct{}) {
36-
var originalDaemonSet *appsv1.DaemonSet
37-
34+
client klient.Client, args map[string]string) (string, *v1.Pod, chan struct{}) {
3835
var err error
3936

4037
var syslogPod *v1.Pod
4138

42-
if updatedDaemonSet {
43-
originalDaemonSet, err = UpdateDaemonSetProcessingStrategy(ctx, t, client, SyslogDaemonSetName, SyslogContainerName)
39+
if args != nil {
40+
err = UpdateDaemonSetArgs(ctx, t, client, SyslogDaemonSetName, SyslogContainerName, args)
4441
require.NoError(t, err, "failed to update syslog health monitor processing strategy")
4542
}
4643

@@ -70,18 +67,18 @@ func SetUpSyslogHealthMonitor(ctx context.Context, t *testing.T,
7067
err = SetNodeManagedByNVSentinel(ctx, client, testNodeName, false)
7168
require.NoError(t, err, "failed to set ManagedByNVSentinel label")
7269

73-
return testNodeName, originalDaemonSet, syslogPod, stopChan
70+
return testNodeName, syslogPod, stopChan
7471
}
7572

7673
// helper function to roll back syslog health monitor daemonset and stop the port forward
7774
func TearDownSyslogHealthMonitor(ctx context.Context, t *testing.T, client klient.Client,
78-
originalDaemonSet *appsv1.DaemonSet, nodeName string, stopChan chan struct{},
79-
updatedDaemonSet bool, podName string) {
75+
nodeName string, stopChan chan struct{},
76+
args map[string]string, podName string) {
8077
t.Log("Stopping port-forward")
8178
close(stopChan)
8279

83-
if updatedDaemonSet {
84-
err := RestoreDaemonSet(ctx, t, client, originalDaemonSet, "syslog-health-monitor-regular")
80+
if args != nil {
81+
err := RemoveDaemonSetArgs(ctx, t, client, SyslogDaemonSetName, SyslogContainerName, args)
8582
require.NoError(t, err, "failed to restore syslog health monitor daemon set")
8683
}
8784

tests/syslog_health_monitor_test.go

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525
"tests/helpers"
2626

2727
"github.com/stretchr/testify/require"
28-
appsv1 "k8s.io/api/apps/v1"
2928
v1 "k8s.io/api/core/v1"
3029
"sigs.k8s.io/e2e-framework/pkg/envconf"
3130
"sigs.k8s.io/e2e-framework/pkg/features"
@@ -34,10 +33,9 @@ import (
3433
type contextKey string
3534

3635
const (
37-
keySyslogNodeName contextKey = "nodeName"
38-
keyStopChan contextKey = "stopChan"
39-
keySyslogPodName contextKey = "syslogPodName"
40-
keySyslogDaemonSet contextKey = "syslogDaemonSet"
36+
keySyslogNodeName contextKey = "nodeName"
37+
keyStopChan contextKey = "stopChan"
38+
keySyslogPodName contextKey = "syslogPodName"
4139
)
4240

4341
// TestSyslogHealthMonitorXIDDetection tests burst XID injection and aggregation
@@ -50,13 +48,12 @@ func TestSyslogHealthMonitorXIDDetection(t *testing.T) {
5048
client, err := c.NewClient()
5149
require.NoError(t, err, "failed to create kubernetes client")
5250

53-
testNodeName, originalDaemonSet, syslogPod, stopChan := helpers.SetUpSyslogHealthMonitor(ctx, t, client, false)
51+
testNodeName, syslogPod, stopChan := helpers.SetUpSyslogHealthMonitor(ctx, t, client, nil)
5452
require.NoError(t, err, "failed to set up syslog health monitor")
5553

5654
ctx = context.WithValue(ctx, keySyslogNodeName, testNodeName)
5755
ctx = context.WithValue(ctx, keySyslogPodName, syslogPod.Name)
5856
ctx = context.WithValue(ctx, keyStopChan, stopChan)
59-
ctx = context.WithValue(ctx, keySyslogDaemonSet, originalDaemonSet)
6057

6158
return ctx
6259
})
@@ -165,11 +162,10 @@ func TestSyslogHealthMonitorXIDDetection(t *testing.T) {
165162
require.NoError(t, err, "failed to create kubernetes client")
166163

167164
nodeName := ctx.Value(keySyslogNodeName).(string)
168-
originalDaemonSet := ctx.Value(keySyslogDaemonSet).(*appsv1.DaemonSet)
169165
syslogPod := ctx.Value(keySyslogPodName).(string)
170166
stopChan := ctx.Value(keyStopChan).(chan struct{})
171167

172-
helpers.TearDownSyslogHealthMonitor(ctx, t, client, originalDaemonSet, nodeName, stopChan, false, syslogPod)
168+
helpers.TearDownSyslogHealthMonitor(ctx, t, client, nodeName, stopChan, nil, syslogPod)
173169

174170
return ctx
175171
})
@@ -301,10 +297,9 @@ func TestSyslogHealthMonitorSXIDDetection(t *testing.T) {
301297
client, err := c.NewClient()
302298
require.NoError(t, err, "failed to create kubernetes client")
303299

304-
testNodeName, originalDaemonSet, syslogPod, stopChan := helpers.SetUpSyslogHealthMonitor(ctx, t, client, false)
300+
testNodeName, syslogPod, stopChan := helpers.SetUpSyslogHealthMonitor(ctx, t, client, nil)
305301

306302
ctx = context.WithValue(ctx, keySyslogNodeName, testNodeName)
307-
ctx = context.WithValue(ctx, keySyslogDaemonSet, originalDaemonSet)
308303
ctx = context.WithValue(ctx, keySyslogPodName, syslogPod.Name)
309304
ctx = context.WithValue(ctx, keyStopChan, stopChan)
310305
return ctx
@@ -354,10 +349,9 @@ func TestSyslogHealthMonitorSXIDDetection(t *testing.T) {
354349

355350
nodeName := ctx.Value(keySyslogNodeName).(string)
356351
stopChan := ctx.Value(keyStopChan).(chan struct{})
357-
originalDaemonSet := ctx.Value(keySyslogDaemonSet).(*appsv1.DaemonSet)
358352
syslogPod := ctx.Value(keySyslogPodName).(string)
359353

360-
helpers.TearDownSyslogHealthMonitor(ctx, t, client, originalDaemonSet, nodeName, stopChan, false, syslogPod)
354+
helpers.TearDownSyslogHealthMonitor(ctx, t, client, nodeName, stopChan, nil, syslogPod)
361355

362356
return ctx
363357
})
@@ -375,16 +369,17 @@ func TestSyslogHealthMonitorStoreOnlyStrategy(t *testing.T) {
375369
client, err := c.NewClient()
376370
require.NoError(t, err, "failed to create kubernetes client")
377371

378-
testNodeName, originalDaemonSet, syslogPod, stopChan := helpers.SetUpSyslogHealthMonitor(ctx, t, client, true)
372+
testNodeName, syslogPod, stopChan := helpers.SetUpSyslogHealthMonitor(ctx, t, client, map[string]string{
373+
"--processing-strategy": "STORE_ONLY",
374+
})
379375

380376
ctx = context.WithValue(ctx, keySyslogNodeName, testNodeName)
381377
ctx = context.WithValue(ctx, keyStopChan, stopChan)
382378
ctx = context.WithValue(ctx, keySyslogPodName, syslogPod.Name)
383-
ctx = context.WithValue(ctx, keySyslogDaemonSet, originalDaemonSet)
384379
return ctx
385380
})
386381

387-
feature.Assess("Inject XID errors and verify GPU lookup via NVSwitch topology", func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context {
382+
feature.Assess("Inject XID errors and verify no node condition is created when running in STORE_ONLY strategy", func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context {
388383
client, err := c.NewClient()
389384
require.NoError(t, err, "failed to create kubernetes client")
390385

@@ -396,10 +391,10 @@ func TestSyslogHealthMonitorStoreOnlyStrategy(t *testing.T) {
396391

397392
helpers.InjectSyslogMessages(t, helpers.StubJournalHTTPPort, xidMessages)
398393

399-
t.Log("Verifying no node condition is created when processing STORE_ONLY strategy")
394+
t.Log("Verifying no node condition is created")
400395
helpers.EnsureNodeConditionNotPresent(ctx, t, client, nodeName, "SysLogsXIDError")
401396

402-
t.Log("Verifying node was not cordoned when processing STORE_ONLY strategy")
397+
t.Log("Verifying node was not cordoned")
403398
helpers.AssertQuarantineState(ctx, t, client, nodeName, helpers.QuarantineAssertion{
404399
ExpectCordoned: false,
405400
ExpectAnnotation: false,
@@ -411,12 +406,14 @@ func TestSyslogHealthMonitorStoreOnlyStrategy(t *testing.T) {
411406
feature.Teardown(func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context {
412407
client, err := c.NewClient()
413408
require.NoError(t, err, "failed to create kubernetes client")
409+
414410
nodeName := ctx.Value(keySyslogNodeName).(string)
415411
stopChan := ctx.Value(keyStopChan).(chan struct{})
416412
syslogPod := ctx.Value(keySyslogPodName).(string)
417413

418-
originalDaemonSet := ctx.Value(keySyslogDaemonSet).(*appsv1.DaemonSet)
419-
helpers.TearDownSyslogHealthMonitor(ctx, t, client, originalDaemonSet, nodeName, stopChan, true, syslogPod)
414+
helpers.TearDownSyslogHealthMonitor(ctx, t, client, nodeName, stopChan, map[string]string{
415+
"--processing-strategy": "EXECUTE_REMEDIATION",
416+
}, syslogPod)
420417

421418
return ctx
422419
})

0 commit comments

Comments
 (0)