Skip to content

Commit f1efd9c

Browse files
committed
feat(cli): gate update-state and reset --package on operator version; clone NodeState before mutation; add chainsaw rejection coverage
Signed-off-by: Alex Yuskauskas <ayuskauskas@nvidia.com>
1 parent 8473f6f commit f1efd9c

8 files changed

Lines changed: 274 additions & 7 deletions

File tree

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
apiVersion: skyhook.nvidia.com/v1alpha1
18+
kind: Skyhook
19+
metadata:
20+
name: cli-update-state-reject-test
21+
status:
22+
status: complete
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
# yaml-language-server: $schema=https://raw.githubusercontent.com/kyverno/chainsaw/main/.schemas/json/test-chainsaw-v1alpha1.json
18+
apiVersion: chainsaw.kyverno.io/v1alpha1
19+
kind: Test
20+
metadata:
21+
name: cli-update-state-reject
22+
spec:
23+
timeouts:
24+
assert: 120s
25+
exec: 90s
26+
steps:
27+
# Step 0: Reset state from previous runs
28+
- name: reset-state
29+
try:
30+
- script:
31+
timeout: 30s
32+
content: |
33+
../skyhook-cli reset cli-update-state-reject-test --confirm 2>/dev/null || true
34+
echo "State reset complete"
35+
36+
# Step 1: Create a Skyhook and wait for it to complete
37+
- name: setup-skyhook
38+
try:
39+
- apply:
40+
file: skyhook.yaml
41+
- assert:
42+
file: assert-skyhook-complete.yaml
43+
44+
# Step 2: update-state must reject a package that is not in the Skyhook spec
45+
- name: reject-unknown-package
46+
try:
47+
- script:
48+
timeout: 30s
49+
content: |
50+
set -e
51+
out=$(../skyhook-cli update-state cli-update-state-reject-test nonexistent-pkg 1.0.0 apply complete --confirm 2>&1) && {
52+
echo "expected non-zero exit, got success: $out"
53+
exit 1
54+
}
55+
echo "$out" | grep -qi "not found in spec" || {
56+
echo "expected error mentioning 'not found in spec', got: $out"
57+
exit 1
58+
}
59+
echo "unknown package correctly rejected"
60+
61+
# Step 3: update-state must reject an invalid stage value
62+
- name: reject-invalid-stage
63+
try:
64+
- script:
65+
timeout: 30s
66+
content: |
67+
set -e
68+
out=$(../skyhook-cli update-state cli-update-state-reject-test test-package 1.0.0 bogus-stage complete --confirm 2>&1) && {
69+
echo "expected non-zero exit, got success: $out"
70+
exit 1
71+
}
72+
echo "$out" | grep -qi "invalid stage" || {
73+
echo "expected error mentioning 'invalid stage', got: $out"
74+
exit 1
75+
}
76+
echo "invalid stage correctly rejected"
77+
78+
# Step 4: update-state must reject an invalid state value
79+
- name: reject-invalid-state
80+
try:
81+
- script:
82+
timeout: 30s
83+
content: |
84+
set -e
85+
out=$(../skyhook-cli update-state cli-update-state-reject-test test-package 1.0.0 apply weird-state --confirm 2>&1) && {
86+
echo "expected non-zero exit, got success: $out"
87+
exit 1
88+
}
89+
echo "$out" | grep -qi "invalid state" || {
90+
echo "expected error mentioning 'invalid state', got: $out"
91+
exit 1
92+
}
93+
echo "invalid state correctly rejected"
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
apiVersion: skyhook.nvidia.com/v1alpha1
18+
kind: Skyhook
19+
metadata:
20+
name: cli-update-state-reject-test
21+
spec:
22+
nodeSelectors:
23+
matchLabels:
24+
skyhook.nvidia.com/test-node: skyhooke2e
25+
packages:
26+
test-package:
27+
version: "1.0.0"
28+
image: ghcr.io/nvidia/skyhook-packages/shellscript
29+
configMap:
30+
apply.sh: |
31+
#!/bin/bash
32+
echo "update-state reject test apply"
33+
sleep 2

operator/cmd/cli/app/reset.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,14 @@ func runPackageReset(
308308
return err
309309
}
310310

311+
skyhook, err := utils.GetSkyhook(ctx, kubeClient.Dynamic(), skyhookName)
312+
if err != nil {
313+
return fmt.Errorf("fetching Skyhook %q: %w", skyhookName, err)
314+
}
315+
if err := checkNodeStateOperatorVersion(ctx, cmd, kubeClient, cliCtx, skyhook); err != nil {
316+
return err
317+
}
318+
311319
annotationKey := nodeStateAnnotationPrefix + skyhookName
312320

313321
type target struct {

operator/cmd/cli/app/reset_test.go

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,19 @@ import (
2828
. "github.com/onsi/ginkgo/v2"
2929
. "github.com/onsi/gomega"
3030
"github.com/spf13/cobra"
31+
"github.com/stretchr/testify/mock"
3132
corev1 "k8s.io/api/core/v1"
3233
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
34+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3335
"k8s.io/apimachinery/pkg/runtime"
36+
"k8s.io/apimachinery/pkg/runtime/schema"
3437
"k8s.io/client-go/kubernetes/fake"
3538
ktesting "k8s.io/client-go/testing"
3639

3740
"github.com/NVIDIA/nodewright/operator/api/v1alpha1"
3841
"github.com/NVIDIA/nodewright/operator/internal/cli/client"
3942
"github.com/NVIDIA/nodewright/operator/internal/cli/context"
43+
mockdynamic "github.com/NVIDIA/nodewright/operator/internal/mocks/dynamic"
4044
)
4145

4246
// Helper function to create a test node with nodeState annotation
@@ -355,16 +359,24 @@ var _ = Describe("Reset Command", func() {
355359

356360
Describe("--package", func() {
357361
var (
358-
fakeKube *fake.Clientset
359-
kubeClient *client.Client
360-
cliCtx *context.CLIContext
361-
cmd *cobra.Command
362-
out *bytes.Buffer
362+
fakeKube *fake.Clientset
363+
mockDynamic *mockdynamic.Interface
364+
kubeClient *client.Client
365+
cliCtx *context.CLIContext
366+
cmd *cobra.Command
367+
out *bytes.Buffer
363368
)
364369

370+
skyhookGVR := schema.GroupVersionResource{
371+
Group: "skyhook.nvidia.com",
372+
Version: "v1alpha1",
373+
Resource: "skyhooks",
374+
}
375+
365376
BeforeEach(func() {
366377
fakeKube = fake.NewClientset()
367-
kubeClient = client.NewWithClientsAndConfig(fakeKube, nil, nil)
378+
mockDynamic = &mockdynamic.Interface{}
379+
kubeClient = client.NewWithClientsAndConfig(fakeKube, mockDynamic, nil)
368380
cliCtx = context.NewCLIContext(nil)
369381
cmd = NewResetCmd(cliCtx)
370382
out = &bytes.Buffer{}
@@ -373,6 +385,22 @@ var _ = Describe("Reset Command", func() {
373385
cmd.SetIn(bytes.NewBufferString("y\n"))
374386
})
375387

388+
setupSkyhookCR := func(name, version string) {
389+
sk := &v1alpha1.Skyhook{}
390+
sk.Name = name
391+
sk.Annotations = map[string]string{
392+
"skyhook.nvidia.com/version": version,
393+
}
394+
u := &unstructured.Unstructured{}
395+
raw, err := json.Marshal(sk)
396+
Expect(err).NotTo(HaveOccurred())
397+
Expect(json.Unmarshal(raw, &u.Object)).To(Succeed())
398+
u.SetGroupVersionKind(schema.GroupVersionKind{Group: "skyhook.nvidia.com", Version: "v1alpha1", Kind: "Skyhook"})
399+
mockNSRes := &mockdynamic.NamespaceableResourceInterface{}
400+
mockDynamic.On("Resource", skyhookGVR).Return(mockNSRes)
401+
mockNSRes.On("Get", mock.Anything, name, mock.Anything, mock.Anything).Return(u, nil)
402+
}
403+
376404
addNode := func(name string, ns v1alpha1.NodeState) {
377405
raw, _ := json.Marshal(ns)
378406
n := &corev1.Node{
@@ -388,6 +416,7 @@ var _ = Describe("Reset Command", func() {
388416
}
389417

390418
It("removes only the named package entry from each node", func() {
419+
setupSkyhookCR("demo", "v0.15.0")
391420
addNode("n1", v1alpha1.NodeState{
392421
"pkg1|1.0": {Name: "pkg1", Version: "1.0", Stage: v1alpha1.StageApply, State: v1alpha1.StateComplete},
393422
"pkg2|2.0": {Name: "pkg2", Version: "2.0", Stage: v1alpha1.StageConfig, State: v1alpha1.StateComplete},
@@ -405,6 +434,7 @@ var _ = Describe("Reset Command", func() {
405434
})
406435

407436
It("with name:version only removes if version matches", func() {
437+
setupSkyhookCR("demo", "v0.15.0")
408438
addNode("n1", v1alpha1.NodeState{
409439
"pkg1|1.0": {Name: "pkg1", Version: "1.0", Stage: v1alpha1.StageApply, State: v1alpha1.StateComplete},
410440
})
@@ -420,6 +450,7 @@ var _ = Describe("Reset Command", func() {
420450
})
421451

422452
It("removes the entire annotation when the last entry is removed", func() {
453+
setupSkyhookCR("demo", "v0.15.0")
423454
addNode("n1", v1alpha1.NodeState{
424455
"pkg1|1.0": {Name: "pkg1", Version: "1.0", Stage: v1alpha1.StageApply, State: v1alpha1.StateComplete},
425456
})
@@ -440,5 +471,18 @@ var _ = Describe("Reset Command", func() {
440471
Entry("empty name with version", ":1.0"),
441472
Entry("empty version with colon", "pkg1:"),
442473
)
474+
475+
It("rejects --package against an operator older than v0.15.0", func() {
476+
setupSkyhookCR("demo", "v0.10.0")
477+
addNode("n1", v1alpha1.NodeState{
478+
"pkg1|1.0": {Name: "pkg1", Version: "1.0", Stage: v1alpha1.StageApply, State: v1alpha1.StateComplete},
479+
})
480+
opts := &resetOptions{confirm: true, skipBatchReset: true, pkg: "pkg1"}
481+
err := runReset(gocontext.Background(), cmd, kubeClient, "demo", opts, cliCtx)
482+
Expect(err).To(HaveOccurred())
483+
Expect(err.Error()).To(ContainSubstring("does not support"))
484+
Expect(err.Error()).To(ContainSubstring("v0.10.0"))
485+
Expect(err.Error()).To(ContainSubstring("v0.15.0"))
486+
})
443487
})
444488
})

operator/cmd/cli/app/update_state.go

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"context"
2323
"encoding/json"
2424
"fmt"
25+
"maps"
2526
"slices"
2627
"sort"
2728

@@ -169,7 +170,10 @@ func buildUpdateStateTarget(
169170
addMode bool,
170171
) (updateStateTarget, bool) {
171172
uniqueName := pkg.GetUniqueName()
172-
ns := nodeStates[name]
173+
// why: nodeStates[name] returns a shared map reference; mutating it would
174+
// silently mutate the caller's snapshot. Clone before mutation so this
175+
// helper stays safe even if it's ever called twice for the same node.
176+
ns := maps.Clone(nodeStates[name])
173177
if ns == nil {
174178
ns = v1alpha1.NodeState{}
175179
}
@@ -222,6 +226,11 @@ func runUpdateState(ctx context.Context, cmd *cobra.Command, kubeClient *client.
222226
if err != nil {
223227
return fmt.Errorf("fetching Skyhook %q: %w", skyhookName, err)
224228
}
229+
230+
if err := checkNodeStateOperatorVersion(ctx, cmd, kubeClient, cliCtx, skyhook); err != nil {
231+
return err
232+
}
233+
225234
pkg, ok := skyhook.Spec.Packages[packageName]
226235
if !ok || pkg.Version != packageVersion {
227236
return fmt.Errorf("package %q (version %q) not found in spec of Skyhook %q", packageName, packageVersion, skyhookName)
@@ -310,3 +319,32 @@ func runUpdateState(ctx context.Context, cmd *cobra.Command, kubeClient *client.
310319
_, _ = fmt.Fprintf(cmd.OutOrStdout(), "\nSuccessfully updated %d node(s)\n", success)
311320
return firstErr
312321
}
322+
323+
// checkNodeStateOperatorVersion rejects the call when the running operator is
324+
// older than MinNodeStateSupportVersion. The check first reads the version
325+
// annotation written by the operator onto the Skyhook CR; when that's missing
326+
// or non-semver (e.g. "dev") it falls back to inspecting the operator
327+
// Deployment. If neither source yields a valid version we warn but allow the
328+
// edit to proceed — better than refusing every command in clusters where the
329+
// CLI can't see the operator's namespace.
330+
func checkNodeStateOperatorVersion(
331+
ctx context.Context,
332+
cmd *cobra.Command,
333+
kubeClient *client.Client,
334+
cliCtx *cliContext.CLIContext,
335+
skyhook *v1alpha1.Skyhook,
336+
) error {
337+
opVersion := utils.GetSkyhookVersion(skyhook)
338+
if opVersion == "" || !utils.IsValidVersion(opVersion) {
339+
deployVersion, derr := utils.DiscoverOperatorVersion(ctx, kubeClient.Kubernetes(), cliCtx.GlobalFlags.Namespace())
340+
if derr == nil && utils.IsValidVersion(deployVersion) {
341+
opVersion = deployVersion
342+
} else {
343+
_, _ = fmt.Fprintf(cmd.ErrOrStderr(), "Warning: unable to determine operator version; cannot verify compatibility (requires %s+)\n", utils.MinNodeStateSupportVersion)
344+
}
345+
}
346+
if utils.IsValidVersion(opVersion) && utils.CompareVersions(opVersion, utils.MinNodeStateSupportVersion) < 0 {
347+
return fmt.Errorf("operator version %s does not support this command; minimum supported version is %s", opVersion, utils.MinNodeStateSupportVersion)
348+
}
349+
return nil
350+
}

operator/cmd/cli/app/update_state_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,30 @@ var _ = Describe("UpdateState Command", func() {
261261
Expect(err.Error()).To(ContainSubstring("apiserver unreachable"))
262262
})
263263

264+
It("rejects update-state against an operator older than v0.15.0", func() {
265+
sk := &v1alpha1.Skyhook{}
266+
sk.Name = "demo"
267+
sk.Annotations = map[string]string{"skyhook.nvidia.com/version": "v0.10.0"}
268+
sk.Spec.Packages = v1alpha1.Packages{
269+
"pkg1": {PackageRef: v1alpha1.PackageRef{Name: "pkg1", Version: "1.0"}, Image: "example.com/pkg1"},
270+
}
271+
u := &unstructured.Unstructured{}
272+
raw, err := json.Marshal(sk)
273+
Expect(err).NotTo(HaveOccurred())
274+
Expect(json.Unmarshal(raw, &u.Object)).To(Succeed())
275+
u.SetGroupVersionKind(schema.GroupVersionKind{Group: "skyhook.nvidia.com", Version: "v1alpha1", Kind: "Skyhook"})
276+
mockNSRes := &mockdynamic.NamespaceableResourceInterface{}
277+
mockDynamic.On("Resource", skyhookGVR).Return(mockNSRes)
278+
mockNSRes.On("Get", mock.Anything, "demo", mock.Anything, mock.Anything).Return(u, nil)
279+
280+
opts := &updateStateOptions{confirm: true}
281+
err = runUpdateState(gocontext.Background(), cmd, kubeClient, []string{"demo", "pkg1", "1.0", "config", "complete"}, opts, cliCtx)
282+
Expect(err).To(HaveOccurred())
283+
Expect(err.Error()).To(ContainSubstring("does not support"))
284+
Expect(err.Error()).To(ContainSubstring("v0.10.0"))
285+
Expect(err.Error()).To(ContainSubstring("v0.15.0"))
286+
})
287+
264288
Describe("--add", func() {
265289
It("requires --node or --selector", func() {
266290
setupSkyhookCR(map[string]string{"pkg1": "1.0"})

operator/internal/cli/utils/utils.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,11 @@ const (
327327
DefaultNamespace = "skyhook"
328328
// MinAnnotationSupportVersion is the minimum operator version that supports annotation-based pause/disable
329329
MinAnnotationSupportVersion = "v0.8.0"
330+
// MinNodeStateSupportVersion is the lowest operator version known to use the
331+
// current map[string]PackageStatus shape for the
332+
// skyhook.nvidia.com/nodeState_<skyhook> annotation. Commands that edit that
333+
// annotation refuse to run against older operators.
334+
MinNodeStateSupportVersion = "v0.15.0"
330335
)
331336

332337
// CompareVersions compares two semver versions.

0 commit comments

Comments
 (0)