Skip to content

Commit 9f439c6

Browse files
committed
feat: add SKYHOOK_NODE_ORDER env var for monotonic node ordering
Packages need to know their position for kubeadm upgrades (first node runs "kubeadm upgrade apply", subsequent nodes run "kubeadm upgrade node"). - Add NodeOrderOffset to SkyhookStatus, incremented via RemoveNodePriority - Add RemoveNodePriority/NodeOrder methods on *Skyhook wrapper - Replace 3 raw delete(NodePriority) sites with RemoveNodePriority - Add SKYHOOK_NODE_ORDER to getAgentConfigEnvVars for all pods - Add name tiebreaker to node sort for deterministic ordering - Reset NodeOrderOffset and NodePriority on CLI reset - Remove omitempty from NodePriority/NodeOrderOffset so nil/0 serialize in merge patches
1 parent b034540 commit 9f439c6

File tree

14 files changed

+374
-21
lines changed

14 files changed

+374
-21
lines changed

chart/templates/skyhook-crd.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,11 @@ spec:
712712
description: NodeBootIds tracks the boot ids of nodes for triggering
713713
on reboot
714714
type: object
715+
nodeOrderOffset:
716+
description: |-
717+
NodeOrderOffset tracks the cumulative count of nodes removed from NodePriority.
718+
Used with NodePriority to compute monotonic SKYHOOK_NODE_ORDER across batches.
719+
type: integer
715720
nodePriority:
716721
additionalProperties:
717722
format: date-time

k8s-tests/chainsaw/cli/reset/assert-nodes-reset.yaml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33
#
44
#
@@ -29,4 +29,11 @@ metadata:
2929
(contains(keys(@), "skyhook.nvidia.com/cordon_cli-reset-test")): false
3030
skyhook.nvidia.com/status_cli-reset-test: disabled ## disabled after clearing
3131
# skyhook.nvidia.com/version_cli-reset-test: .* ## also rest after clearing
32-
32+
---
33+
# Verify node ordering is reset after CLI reset
34+
apiVersion: skyhook.nvidia.com/v1alpha1
35+
kind: Skyhook
36+
metadata:
37+
name: cli-reset-test
38+
status:
39+
nodeOrderOffset: 0

k8s-tests/chainsaw/skyhook/strict-order/assert-both-nodes-complete.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,16 @@ metadata:
2929
skyhook.nvidia.com/status_strict-order-skyhook-c: complete
3030
skyhook.nvidia.com/status_strict-order-skyhook-d: disabled
3131
---
32+
# Verify nodeOrderOffset incremented as nodes completed and were pruned from NodePriority
33+
# With 2 nodes, each skyhook should have offset=2 after both complete
34+
apiVersion: skyhook.nvidia.com/v1alpha1
35+
kind: Skyhook
36+
metadata:
37+
name: strict-order-skyhook-zzz
38+
status:
39+
nodeOrderOffset: 2
40+
(length(nodePriority || `{}`)): 0
41+
---
3242
# Worker2 node complete (d still disabled)
3343
apiVersion: v1
3444
kind: Node

k8s-tests/chainsaw/skyhook/strict-order/chainsaw-test.yaml

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,39 @@ spec:
7777
try:
7878
- sleep:
7979
duration: 5s
80+
# Verify SKYHOOK_NODE_ORDER is set on the config pod for zzz on kind-worker (before zzz completes)
81+
- assert:
82+
resource:
83+
apiVersion: v1
84+
kind: Pod
85+
metadata:
86+
namespace: skyhook
87+
labels:
88+
skyhook.nvidia.com/name: strict-order-skyhook-zzz
89+
skyhook.nvidia.com/package: foobar-1.2
90+
annotations:
91+
("skyhook.nvidia.com/package" && parse_json("skyhook.nvidia.com/package")):
92+
{
93+
"name": "foobar",
94+
"version": "1.2",
95+
"skyhook": "strict-order-skyhook-zzz",
96+
"stage": "config",
97+
"image": "ghcr.io/nvidia/skyhook/agentless"
98+
}
99+
spec:
100+
nodeName: kind-worker
101+
initContainers:
102+
- name: foobar-init
103+
- name: foobar-config
104+
env:
105+
([5]):
106+
name: SKYHOOK_NODE_ORDER
107+
value: "0"
108+
- name: foobar-configcheck
109+
env:
110+
([5]):
111+
name: SKYHOOK_NODE_ORDER
112+
value: "0"
80113
- assert:
81114
file: assert-node1-priority1-complete-node2-blocked.yaml
82115
- script:
@@ -86,7 +119,6 @@ spec:
86119
../../metrics_test.py skyhook_node_status_count 1 -t skyhook_name=strict-order-skyhook-zzz -t status=blocked
87120
../../metrics_test.py skyhook_package_state_count 1 -t package_name=foobar -t package_version=1.2 -t skyhook_name=strict-order-skyhook-zzz -t state=complete
88121
89-
90122
# Unblock worker2 only (keep b paused) so it can catch up through zzz and gate
91123
- name: unblock-worker2
92124
try:
@@ -96,7 +128,39 @@ spec:
96128
content: |
97129
## Remove blocking taint from worker2 - it will catch up through zzz then gate
98130
kubectl taint node kind-worker2 test-block- 2>/dev/null || true
99-
131+
# Verify SKYHOOK_NODE_ORDER is set on the zzz config pod for kind-worker2 (after taint removed)
132+
- assert:
133+
resource:
134+
apiVersion: v1
135+
kind: Pod
136+
metadata:
137+
namespace: skyhook
138+
labels:
139+
skyhook.nvidia.com/name: strict-order-skyhook-zzz
140+
skyhook.nvidia.com/package: foobar-1.2
141+
annotations:
142+
("skyhook.nvidia.com/package" && parse_json("skyhook.nvidia.com/package")):
143+
{
144+
"name": "foobar",
145+
"version": "1.2",
146+
"skyhook": "strict-order-skyhook-zzz",
147+
"stage": "config",
148+
"image": "ghcr.io/nvidia/skyhook/agentless"
149+
}
150+
spec:
151+
nodeName: kind-worker2
152+
initContainers:
153+
- name: foobar-init
154+
- name: foobar-config
155+
env:
156+
([5]):
157+
name: SKYHOOK_NODE_ORDER
158+
value: "1"
159+
- name: foobar-configcheck
160+
env:
161+
([5]):
162+
name: SKYHOOK_NODE_ORDER
163+
value: "1"
100164

101165
# Phase 3b: Assert sequencing: all gate blocks worker from priority 3
102166
# Worker completes gate (priority 2) but must WAIT for worker2 to also complete gate

operator/api/v1alpha1/skyhook_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,10 @@ type SkyhookStatus struct {
371371
// NodePriority tracks what nodes we are working on. This is makes the interrupts budgets sticky.
372372
NodePriority map[string]metav1.Time `json:"nodePriority,omitempty"`
373373

374+
// NodeOrderOffset tracks the cumulative count of nodes removed from NodePriority.
375+
// Used with NodePriority to compute monotonic SKYHOOK_NODE_ORDER across batches.
376+
NodeOrderOffset int `json:"nodeOrderOffset,omitempty"`
377+
374378
// ConfigUpdates tracks config updates
375379
ConfigUpdates map[string][]string `json:"configUpdates,omitempty"`
376380

operator/cmd/cli/app/reset.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,14 +257,30 @@ func resetBatchStateForReset(ctx context.Context, cmd *cobra.Command, kubeClient
257257
return
258258
}
259259

260+
// Reset node ordering in memory
261+
skyhook.Status.NodeOrderOffset = 0
262+
skyhook.Status.NodePriority = nil
263+
260264
if len(skyhook.Status.CompartmentStatuses) == 0 {
265+
// No compartments — use raw patch to clear omitempty fields explicitly
266+
if err := utils.PatchSkyhookStatusRaw(ctx, kubeClient.Dynamic(), skyhookName,
267+
[]byte(`{"status":{"nodeOrderOffset":0,"nodePriority":null}}`)); err != nil {
268+
_, _ = fmt.Fprintf(cmd.ErrOrStderr(), "Warning: failed to reset node ordering: %v\n", err)
269+
}
261270
return
262271
}
263272

273+
// With compartments, PatchSkyhookStatus writes the full status.
274+
// NodeOrderOffset=0 is dropped by omitempty, so follow up with raw patch.
264275
skyhook.ResetCompartmentBatchStates()
265276
if err := utils.PatchSkyhookStatus(ctx, kubeClient.Dynamic(), skyhookName, skyhook.Status); err != nil {
266277
_, _ = fmt.Fprintf(cmd.ErrOrStderr(), "Warning: failed to reset batch state: %v\n", err)
267278
return
268279
}
280+
// Raw patch to clear omitempty fields that PatchSkyhookStatus can't zero
281+
if err := utils.PatchSkyhookStatusRaw(ctx, kubeClient.Dynamic(), skyhookName,
282+
[]byte(`{"status":{"nodeOrderOffset":0,"nodePriority":null}}`)); err != nil {
283+
_, _ = fmt.Fprintf(cmd.ErrOrStderr(), "Warning: failed to reset node ordering: %v\n", err)
284+
}
269285
_, _ = fmt.Fprintf(cmd.OutOrStdout(), "Batch state reset for Skyhook %q\n", skyhookName)
270286
}

operator/config/crd/bases/skyhook.nvidia.com_skyhooks.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,11 @@ spec:
719719
description: NodeBootIds tracks the boot ids of nodes for triggering
720720
on reboot
721721
type: object
722+
nodeOrderOffset:
723+
description: |-
724+
NodeOrderOffset tracks the cumulative count of nodes removed from NodePriority.
725+
Used with NodePriority to compute monotonic SKYHOOK_NODE_ORDER across batches.
726+
type: integer
722727
nodePriority:
723728
additionalProperties:
724729
format: date-time

operator/internal/cli/utils/utils.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,24 @@ func ExtractImageTag(image string) string {
456456
return strings.TrimSpace(parts[len(parts)-1])
457457
}
458458

459+
// PatchSkyhookStatusRaw patches the skyhook status with raw JSON bytes.
460+
// Use this when you need explicit null values to clear omitempty fields.
461+
func PatchSkyhookStatusRaw(ctx context.Context, dynamicClient dynamic.Interface, skyhookName string, patchBytes []byte) error {
462+
gvr := v1alpha1.GroupVersion.WithResource("skyhooks")
463+
_, err := dynamicClient.Resource(gvr).Patch(
464+
ctx,
465+
skyhookName,
466+
types.MergePatchType,
467+
patchBytes,
468+
metav1.PatchOptions{},
469+
"status",
470+
)
471+
if err != nil {
472+
return fmt.Errorf("patching skyhook %q status: %w", skyhookName, err)
473+
}
474+
return nil
475+
}
476+
459477
// PatchSkyhookStatus patches the status subresource of a Skyhook CR using the dynamic client.
460478
// This is used to update status fields without triggering a spec update.
461479
func PatchSkyhookStatus(ctx context.Context, dynamicClient dynamic.Interface, skyhookName string, status v1alpha1.SkyhookStatus) error {

operator/internal/controller/cluster_state_v2.go

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,12 @@ func BuildState(skyhooks *v1alpha1.SkyhookList, nodes *corev1.NodeList, deployme
144144

145145
for _, skyhook := range ret.skyhooks {
146146
sort.Slice(skyhook.GetNodes(), func(i, j int) bool {
147-
return skyhook.GetNodes()[i].GetNode().CreationTimestamp.Before(&skyhook.GetNodes()[j].GetNode().CreationTimestamp)
147+
ti := skyhook.GetNodes()[i].GetNode().CreationTimestamp
148+
tj := skyhook.GetNodes()[j].GetNode().CreationTimestamp
149+
if !ti.Equal(&tj) {
150+
return ti.Before(&tj)
151+
}
152+
return skyhook.GetNodes()[i].GetNode().Name < skyhook.GetNodes()[j].GetNode().Name
148153
})
149154
}
150155

@@ -648,8 +653,7 @@ func (s *NodePicker) primeAndPruneNodes(skyhook SkyhookNodes) {
648653
// prune
649654
// if the node is complete, remove it from the priority list
650655
if nodeStatus, _ := skyhook.GetNode(n); nodeStatus == v1alpha1.StatusComplete {
651-
delete(skyhook.GetSkyhook().Status.NodePriority, n)
652-
skyhook.GetSkyhook().Updated = true
656+
skyhook.GetSkyhook().RemoveNodePriority(n)
653657
} else {
654658
s.priorityNodes[n] = t.Time
655659
}
@@ -1458,11 +1462,17 @@ func CleanupRemovedNodes(skyhook SkyhookNodes) {
14581462

14591463
status := skyhook.GetSkyhook().Status
14601464

1461-
// Check and remove nodes from all status maps
1465+
// NodePriority needs special handling to track offset
1466+
for name := range status.NodePriority {
1467+
if _, ok := currentNodeNames[name]; !ok {
1468+
skyhook.GetSkyhook().RemoveNodePriority(name)
1469+
}
1470+
}
1471+
1472+
// Check and remove nodes from remaining status maps
14621473
change := cleanupNodeMap(status.NodeState, currentNodeNames)
14631474
change = cleanupNodeMap(status.NodeStatus, currentNodeNames) || change
14641475
change = cleanupNodeMap(status.NodeBootIds, currentNodeNames) || change
1465-
change = cleanupNodeMap(status.NodePriority, currentNodeNames) || change
14661476

14671477
// Only set Updated flag if there were changes
14681478
if change {

operator/internal/controller/cluster_state_v2_test.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -901,6 +901,9 @@ var _ = Describe("CleanupRemovedNodes", func() {
901901
Expect(mockSkyhook.Status.ConfigUpdates).To(HaveKey("package2"))
902902
Expect(mockSkyhook.Status.ConfigUpdates).To(HaveKey("package3"))
903903

904+
// Verify NodeOrderOffset was incremented for the removed node
905+
Expect(mockSkyhook.Status.NodeOrderOffset).To(Equal(1))
906+
904907
// Verify that Updated flag was set since changes were made
905908
Expect(mockSkyhook.Updated).To(BeTrue())
906909
})
@@ -963,7 +966,10 @@ var _ = Describe("CleanupRemovedNodes", func() {
963966
// ConfigUpdates should remain unchanged since it's keyed by package names, not node names
964967
Expect(mockSkyhook.Status.ConfigUpdates).To(HaveKey("package1"))
965968

966-
// Verify that Updated flag was set since changes were made
969+
// Verify NodeOrderOffset was NOT incremented
970+
Expect(mockSkyhook.Status.NodeOrderOffset).To(Equal(0))
971+
972+
// Verify that Updated flag was NOT set since no changes were made
967973
Expect(mockSkyhook.Updated).To(BeFalse())
968974
})
969975

0 commit comments

Comments
 (0)