Skip to content

Commit 98fe42d

Browse files
authored
feat(controller): re-apply runtime-required taint on reboot when autoTaintNewNodes=true (#273)
When REAPPLY_ON_REBOOT=true, runtimeRequired=true, and autoTaintNewNodes=true are all set, a rebooted node must be re-tainted so workloads cannot schedule on it until Skyhook finishes re-applying. Previously, node.Reset() cleared the nodeState/cordon/status annotations but left the autoTaint annotation intact, so HasSkyhookAnnotations() remained true and HandleAutoTaint never re-tainted the node. The fix appends the runtime-required taint to the node inside the existing ReapplyOnReboot block, after node.Reset() and before the StrategicMerge patch, so the taint re-application and state reset land in one atomic apiserver write. Taint removal on completion is unchanged (HandleRuntimeRequiredTaint). Closes #180 Signed-off-by: Brian Lockwood <lockwobr@gmail.com>
1 parent aae1166 commit 98fe42d

3 files changed

Lines changed: 161 additions & 0 deletions

File tree

docs/runtime_required.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ When enabled, the operator automatically applies the runtime-required taint to n
3737

3838
A node is considered "new" if it has no Skyhook annotations. This works for both initial cluster setup (day 0) and nodes joining an existing cluster (day 2+). Nodes that have already been processed by Skyhook (and had their taint removed after completion) will not be re-tainted because they retain their Skyhook annotations.
3939

40+
**Exception: reboot with `REAPPLY_ON_REBOOT=true`.** When the operator is configured with `REAPPLY_ON_REBOOT=true` and a Skyhook has both `runtimeRequired: true` and `autoTaintNewNodes: true`, a node whose boot ID changes is treated as new for taint purposes. The runtime-required taint is re-applied alongside the state reset in the same atomic operation, ensuring no workloads can schedule on the rebooted node before Skyhook finishes re-applying. The taint is removed again by the normal completion path once all runtime-required Skyhooks finish on that node.
41+
4042
## What runtimeRequired: true will NOT do
4143

4244
1. Without `autoTaintNewNodes: true`, it will NOT add the taint to any nodes targeted by a SCR with `runtimeRequired: true`

operator/internal/controller/skyhook_controller.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,19 @@ func (r *SkyhookReconciler) TrackReboots(ctx context.Context, clusterState *clus
680680
r.recorder.Eventf(node.GetNode(), nil, EventTypeNormal, EventsReasonNodeReboot, "ResetNodeState", "detected reboot, resetting node for [%s] to be reapplied", node.GetSkyhook().Name)
681681
node.Reset()
682682

683+
// Re-apply the runtime-required taint so workloads cannot schedule on the
684+
// rebooted node until Skyhook finishes re-applying. The original auto-taint
685+
// annotation survives Reset() and remains the record that this taint is
686+
// operator-managed; no annotation update is needed.
687+
if skyhook.GetSkyhook().Spec.RuntimeRequired && skyhook.GetSkyhook().Spec.AutoTaintNewNodes {
688+
taintToAdd := r.opts.GetRuntimeRequiredTaint()
689+
newNode, updated, _ := taints.AddOrUpdateTaint(node.GetNode(), &taintToAdd)
690+
if updated {
691+
node.GetNode().Spec.Taints = newNode.Spec.Taints
692+
log.FromContext(ctx).Info("re-applying runtime-required taint after reboot", "node", node.GetNode().Name, "taint", taintToAdd.Key)
693+
}
694+
}
695+
683696
// Persist the reset before recording the new boot id. We Patch rather than
684697
// Update because a busy node's resourceVersion churns constantly under other
685698
// controllers, and a full Update would lose that optimistic-concurrency race; a

operator/internal/controller/skyhook_controller_test.go

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3795,3 +3795,149 @@ var _ = Describe("ProcessInterrupt skipped-package promotion", func() {
37953795
"a skipped package must keep waiting while the preempting interrupt is still pending")
37963796
})
37973797
})
3798+
3799+
var _ = Describe("TrackReboots auto-taint on reboot", func() {
3800+
const (
3801+
defaultRuntimeRequiredTaint = "skyhook.nvidia.com=runtime-required:NoSchedule"
3802+
oldBootID = "boot-A"
3803+
newBootID = "boot-B"
3804+
)
3805+
3806+
It("should re-apply the runtime-required taint when all three conditions are met", func() {
3807+
const (
3808+
skyhookName = "auto-taint-reboot-sh"
3809+
nodeName = "auto-taint-reboot-node"
3810+
)
3811+
nodeLabel := map[string]string{"auto-taint-reboot-test": "yes"}
3812+
autoTaintAnnotationKey := fmt.Sprintf("%s/autoTaint_skyhook.nvidia.com", v1alpha1.METADATA_PREFIX)
3813+
3814+
// Node arrives without the taint (it was removed after previous completion)
3815+
// but retains the autoTaint annotation from the original auto-taint.
3816+
node := &corev1.Node{
3817+
ObjectMeta: metav1.ObjectMeta{
3818+
Name: nodeName,
3819+
Labels: nodeLabel,
3820+
Annotations: map[string]string{
3821+
autoTaintAnnotationKey: "true",
3822+
},
3823+
},
3824+
}
3825+
Expect(k8sClient.Create(ctx, node)).To(Succeed())
3826+
DeferCleanup(func() { _ = k8sClient.Delete(ctx, node) })
3827+
3828+
node.Status.NodeInfo.BootID = newBootID
3829+
Expect(k8sClient.Status().Update(ctx, node)).To(Succeed())
3830+
snapshotNode := node.DeepCopy()
3831+
3832+
pkgRef := v1alpha1.PackageRef{Name: "pkg1", Version: "1.0.0"}
3833+
skyhook := v1alpha1.Skyhook{
3834+
ObjectMeta: metav1.ObjectMeta{Name: skyhookName},
3835+
Spec: v1alpha1.SkyhookSpec{
3836+
NodeSelector: metav1.LabelSelector{MatchLabels: nodeLabel},
3837+
RuntimeRequired: true,
3838+
AutoTaintNewNodes: true,
3839+
Packages: v1alpha1.Packages{
3840+
pkgRef.Name: {PackageRef: pkgRef, Image: "ghcr.io/org/pkg1"},
3841+
},
3842+
},
3843+
Status: v1alpha1.SkyhookStatus{
3844+
NodeBootIds: map[string]string{nodeName: oldBootID},
3845+
},
3846+
}
3847+
3848+
clusterState, err := BuildState(
3849+
&v1alpha1.SkyhookList{Items: []v1alpha1.Skyhook{skyhook}},
3850+
&corev1.NodeList{Items: []corev1.Node{*snapshotNode}},
3851+
&v1alpha1.DeploymentPolicyList{},
3852+
)
3853+
Expect(err).ToNot(HaveOccurred())
3854+
Expect(clusterState.skyhooks[0].GetNodes()).To(HaveLen(1))
3855+
3856+
r := &SkyhookReconciler{
3857+
Client: k8sClient,
3858+
recorder: operator.recorder,
3859+
opts: SkyhookOperatorOptions{
3860+
ReapplyOnReboot: true,
3861+
RuntimeRequiredTaint: defaultRuntimeRequiredTaint,
3862+
},
3863+
}
3864+
3865+
_, err = r.TrackReboots(ctx, clusterState)
3866+
// Status().Update fails because the Skyhook is in-memory only; the node patch is what matters.
3867+
if err != nil {
3868+
Expect(err.Error()).ToNot(ContainSubstring("node after reboot"),
3869+
"the node-state reset/taint write must not fail")
3870+
}
3871+
3872+
live := &corev1.Node{}
3873+
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: nodeName}, live)).To(Succeed())
3874+
Expect(live.Spec.Taints).To(ContainElement(
3875+
Equal(corev1.Taint{Key: "skyhook.nvidia.com", Value: "runtime-required", Effect: corev1.TaintEffectNoSchedule}),
3876+
), "runtime-required taint must be re-applied after reboot when all three conditions are met")
3877+
})
3878+
3879+
It("should NOT re-apply the taint when AutoTaintNewNodes is false", func() {
3880+
const (
3881+
skyhookName = "no-auto-taint-reboot-sh"
3882+
nodeName = "no-auto-taint-reboot-node"
3883+
)
3884+
nodeLabel := map[string]string{"no-auto-taint-reboot-test": "yes"}
3885+
3886+
node := &corev1.Node{
3887+
ObjectMeta: metav1.ObjectMeta{
3888+
Name: nodeName,
3889+
Labels: nodeLabel,
3890+
},
3891+
}
3892+
Expect(k8sClient.Create(ctx, node)).To(Succeed())
3893+
DeferCleanup(func() { _ = k8sClient.Delete(ctx, node) })
3894+
3895+
node.Status.NodeInfo.BootID = newBootID
3896+
Expect(k8sClient.Status().Update(ctx, node)).To(Succeed())
3897+
snapshotNode := node.DeepCopy()
3898+
3899+
pkgRef := v1alpha1.PackageRef{Name: "pkg1", Version: "1.0.0"}
3900+
skyhook := v1alpha1.Skyhook{
3901+
ObjectMeta: metav1.ObjectMeta{Name: skyhookName},
3902+
Spec: v1alpha1.SkyhookSpec{
3903+
NodeSelector: metav1.LabelSelector{MatchLabels: nodeLabel},
3904+
RuntimeRequired: true,
3905+
AutoTaintNewNodes: false, // guard: disabled
3906+
Packages: v1alpha1.Packages{
3907+
pkgRef.Name: {PackageRef: pkgRef, Image: "ghcr.io/org/pkg1"},
3908+
},
3909+
},
3910+
Status: v1alpha1.SkyhookStatus{
3911+
NodeBootIds: map[string]string{nodeName: oldBootID},
3912+
},
3913+
}
3914+
3915+
clusterState, err := BuildState(
3916+
&v1alpha1.SkyhookList{Items: []v1alpha1.Skyhook{skyhook}},
3917+
&corev1.NodeList{Items: []corev1.Node{*snapshotNode}},
3918+
&v1alpha1.DeploymentPolicyList{},
3919+
)
3920+
Expect(err).ToNot(HaveOccurred())
3921+
3922+
r := &SkyhookReconciler{
3923+
Client: k8sClient,
3924+
recorder: operator.recorder,
3925+
opts: SkyhookOperatorOptions{
3926+
ReapplyOnReboot: true,
3927+
RuntimeRequiredTaint: defaultRuntimeRequiredTaint,
3928+
},
3929+
}
3930+
3931+
_, err = r.TrackReboots(ctx, clusterState)
3932+
if err != nil {
3933+
Expect(err.Error()).ToNot(ContainSubstring("node after reboot"),
3934+
"the node-state reset write must not fail")
3935+
}
3936+
3937+
live := &corev1.Node{}
3938+
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: nodeName}, live)).To(Succeed())
3939+
Expect(live.Spec.Taints).ToNot(ContainElement(
3940+
HaveField("Key", "skyhook.nvidia.com"),
3941+
), "taint must NOT be re-applied when AutoTaintNewNodes is false")
3942+
})
3943+
})

0 commit comments

Comments
 (0)