Skip to content

Commit 72dbcf0

Browse files
authored
feat: RDMA, SR-IOV, Multus auto-restart pods on config changes (#2035)
Fixes #1831 Adds automatic pod restart when device plugin ConfigMap content changes. A SHA256 hash of the config is added to the DaemonSet pod template annotation (nvidia.network-operator.config-hash). When the config changes, the hash changes, triggering a Kubernetes rolling update. Changes: - pkg/utils/utils.go - Added GetStringHash() function - pkg/consts/consts.go - Added ConfigHashAnnotation constant - pkg/state/state_skel.go - Added SetConfigHashAnnotation() helper - pkg/state/state_rdma_shared_device_plugin.go - Uses config hash - pkg/state/state_sriov_dp.go - Uses config hash - pkg/state/state_multus_cni.go - Uses config hash - Unit tests added
2 parents f9cb5cb + a64bcf4 commit 72dbcf0

File tree

8 files changed

+226
-0
lines changed

8 files changed

+226
-0
lines changed

pkg/consts/consts.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,6 @@ const (
5050
OfedDriverSkipDrainLabelSelector = "nvidia.com/ofed-driver-upgrade-drain.skip!=true"
5151
// ControllerRevisionAnnotation is the key for annotations used to store revision information on Kubernetes objects.
5252
ControllerRevisionAnnotation = "nvidia.network-operator.revision"
53+
// ConfigHashAnnotation is the key for annotations used to store config hash on pod templates.
54+
ConfigHashAnnotation = "nvidia.network-operator.config-hash"
5355
)

pkg/state/state_multus_cni.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,13 @@ func (s *stateMultusCNI) GetManifestObjects(
165165
return nil, errors.Wrap(err, "failed to render objects")
166166
}
167167

168+
if cr.Spec.SecondaryNetwork.Multus.Config != nil {
169+
configHash := utils.GetStringHash(*cr.Spec.SecondaryNetwork.Multus.Config)
170+
if err := SetConfigHashAnnotation(objs, configHash); err != nil {
171+
return nil, errors.Wrap(err, "failed to set config hash annotation")
172+
}
173+
}
174+
168175
reqLogger.V(consts.LogLevelDebug).Info("Rendered", "objects:", objs)
169176
return objs, nil
170177
}

pkg/state/state_rdma_shared_device_plugin.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ func (s *stateRDMASharedDevicePlugin) GetManifestObjects(
147147
if clusterInfo == nil {
148148
return nil, errors.New("clusterInfo provider required")
149149
}
150+
150151
renderData := &stateRDMASharedDevicePluginManifestRenderData{
151152
CrSpec: cr.Spec.RdmaSharedDevicePlugin,
152153
Tolerations: cr.Spec.Tolerations,
@@ -164,6 +165,14 @@ func (s *stateRDMASharedDevicePlugin) GetManifestObjects(
164165
if err != nil {
165166
return nil, errors.Wrap(err, "failed to render objects")
166167
}
168+
169+
if cr.Spec.RdmaSharedDevicePlugin.Config != nil {
170+
configHash := utils.GetStringHash(*cr.Spec.RdmaSharedDevicePlugin.Config)
171+
if err := SetConfigHashAnnotation(objs, configHash); err != nil {
172+
return nil, errors.Wrap(err, "failed to set config hash annotation")
173+
}
174+
}
175+
167176
reqLogger.V(consts.LogLevelDebug).Info("Rendered", "objects:", objs)
168177
return objs, nil
169178
}

pkg/state/state_skel.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,3 +471,33 @@ func (s *stateSkel) isDaemonSetReady(uds *unstructured.Unstructured, reqLogger l
471471
func (s *stateSkel) SetRenderer(renderer render.Renderer) {
472472
s.renderer = renderer
473473
}
474+
475+
// SetConfigHashAnnotation sets config hash annotation on DaemonSet pod templates.
476+
func SetConfigHashAnnotation(objs []*unstructured.Unstructured, configHash string) error {
477+
if configHash == "" {
478+
return nil
479+
}
480+
481+
for _, obj := range objs {
482+
if obj.GetKind() != "DaemonSet" {
483+
continue
484+
}
485+
486+
annotations, found, err := unstructured.NestedStringMap(obj.Object,
487+
"spec", "template", "metadata", "annotations")
488+
if err != nil {
489+
return errors.Wrap(err, "failed to get pod template annotations")
490+
}
491+
if !found || annotations == nil {
492+
annotations = make(map[string]string)
493+
}
494+
495+
annotations[consts.ConfigHashAnnotation] = configHash
496+
497+
if err := unstructured.SetNestedStringMap(obj.Object, annotations,
498+
"spec", "template", "metadata", "annotations"); err != nil {
499+
return errors.Wrap(err, "failed to set pod template annotations")
500+
}
501+
}
502+
return nil
503+
}

pkg/state/state_skel_test.go

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,135 @@ var _ = Describe("stateSkel", func() {
8080
})
8181
})
8282
})
83+
84+
var _ = Describe("SetConfigHashAnnotation", func() {
85+
Context("when configHash is empty", func() {
86+
It("should not modify any objects", func() {
87+
ds := createTestDaemonSet("test-ds")
88+
objs := []*unstructured.Unstructured{ds}
89+
err := SetConfigHashAnnotation(objs, "")
90+
Expect(err).NotTo(HaveOccurred())
91+
92+
annotations, found, err := unstructured.NestedStringMap(ds.Object,
93+
"spec", "template", "metadata", "annotations")
94+
Expect(err).NotTo(HaveOccurred())
95+
Expect(found).To(BeFalse())
96+
Expect(annotations).To(BeNil())
97+
})
98+
})
99+
100+
Context("when configHash is provided", func() {
101+
It("should add annotation to DaemonSet pod template", func() {
102+
ds := createTestDaemonSet("test-ds")
103+
objs := []*unstructured.Unstructured{ds}
104+
configHash := "abc123hash"
105+
106+
err := SetConfigHashAnnotation(objs, configHash)
107+
Expect(err).NotTo(HaveOccurred())
108+
109+
annotations, found, err := unstructured.NestedStringMap(ds.Object,
110+
"spec", "template", "metadata", "annotations")
111+
Expect(err).NotTo(HaveOccurred())
112+
Expect(found).To(BeTrue())
113+
Expect(annotations[consts.ConfigHashAnnotation]).To(Equal(configHash))
114+
})
115+
116+
It("should preserve existing annotations", func() {
117+
ds := createTestDaemonSet("test-ds")
118+
// Add existing annotation
119+
err := unstructured.SetNestedStringMap(ds.Object,
120+
map[string]string{"existing": "annotation"},
121+
"spec", "template", "metadata", "annotations")
122+
Expect(err).NotTo(HaveOccurred())
123+
124+
objs := []*unstructured.Unstructured{ds}
125+
configHash := "abc123hash"
126+
127+
err = SetConfigHashAnnotation(objs, configHash)
128+
Expect(err).NotTo(HaveOccurred())
129+
130+
annotations, found, err := unstructured.NestedStringMap(ds.Object,
131+
"spec", "template", "metadata", "annotations")
132+
Expect(err).NotTo(HaveOccurred())
133+
Expect(found).To(BeTrue())
134+
Expect(annotations["existing"]).To(Equal("annotation"))
135+
Expect(annotations[consts.ConfigHashAnnotation]).To(Equal(configHash))
136+
})
137+
138+
It("should not modify non-DaemonSet objects", func() {
139+
configMap := &unstructured.Unstructured{
140+
Object: map[string]interface{}{
141+
"apiVersion": "v1",
142+
"kind": "ConfigMap",
143+
"metadata": map[string]interface{}{
144+
"name": "test-cm",
145+
"namespace": "test-ns",
146+
},
147+
},
148+
}
149+
objs := []*unstructured.Unstructured{configMap}
150+
configHash := "abc123hash"
151+
152+
err := SetConfigHashAnnotation(objs, configHash)
153+
Expect(err).NotTo(HaveOccurred())
154+
155+
// ConfigMap should not have the annotation path
156+
_, found, _ := unstructured.NestedStringMap(configMap.Object,
157+
"spec", "template", "metadata", "annotations")
158+
Expect(found).To(BeFalse())
159+
})
160+
161+
It("should handle multiple DaemonSets", func() {
162+
ds1 := createTestDaemonSet("test-ds-1")
163+
ds2 := createTestDaemonSet("test-ds-2")
164+
objs := []*unstructured.Unstructured{ds1, ds2}
165+
configHash := "abc123hash"
166+
167+
err := SetConfigHashAnnotation(objs, configHash)
168+
Expect(err).NotTo(HaveOccurred())
169+
170+
for _, ds := range objs {
171+
annotations, found, err := unstructured.NestedStringMap(ds.Object,
172+
"spec", "template", "metadata", "annotations")
173+
Expect(err).NotTo(HaveOccurred())
174+
Expect(found).To(BeTrue())
175+
Expect(annotations[consts.ConfigHashAnnotation]).To(Equal(configHash))
176+
}
177+
})
178+
})
179+
})
180+
181+
func createTestDaemonSet(name string) *unstructured.Unstructured {
182+
return &unstructured.Unstructured{
183+
Object: map[string]interface{}{
184+
"apiVersion": "apps/v1",
185+
"kind": "DaemonSet",
186+
"metadata": map[string]interface{}{
187+
"name": name,
188+
"namespace": "test-ns",
189+
},
190+
"spec": map[string]interface{}{
191+
"selector": map[string]interface{}{
192+
"matchLabels": map[string]interface{}{
193+
"app": name,
194+
},
195+
},
196+
"template": map[string]interface{}{
197+
"metadata": map[string]interface{}{
198+
"labels": map[string]interface{}{
199+
"app": name,
200+
},
201+
},
202+
"spec": map[string]interface{}{
203+
"containers": []interface{}{
204+
map[string]interface{}{
205+
"name": "test-container",
206+
"image": "test-image",
207+
},
208+
},
209+
},
210+
},
211+
},
212+
},
213+
}
214+
}

pkg/state/state_sriov_dp.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ func (s *stateSriovDp) GetManifestObjects(
149149
if clusterInfo == nil {
150150
return nil, errors.New("clusterInfo provider required")
151151
}
152+
152153
renderData := &sriovDpManifestRenderData{
153154
CrSpec: cr.Spec.SriovDevicePlugin,
154155
Tolerations: cr.Spec.Tolerations,
@@ -166,6 +167,14 @@ func (s *stateSriovDp) GetManifestObjects(
166167
if err != nil {
167168
return nil, errors.Wrap(err, "failed to render objects")
168169
}
170+
171+
if cr.Spec.SriovDevicePlugin.Config != nil {
172+
configHash := utils.GetStringHash(*cr.Spec.SriovDevicePlugin.Config)
173+
if err := SetConfigHashAnnotation(objs, configHash); err != nil {
174+
return nil, errors.Wrap(err, "failed to set config hash annotation")
175+
}
176+
}
177+
169178
reqLogger.V(consts.LogLevelDebug).Info("Rendered", "objects:", objs)
170179
return objs, nil
171180
}

pkg/utils/utils.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ limitations under the License.
1818
package utils
1919

2020
import (
21+
"crypto/sha256"
22+
"encoding/base64"
2123
"fmt"
2224
"os"
2325
"path/filepath"
@@ -91,3 +93,9 @@ func GetCniNetworkDirectory(staticInfo staticconfig.Provider, _ clustertype.Prov
9193
}
9294
return consts.DefaultCniNetworkDirectory
9395
}
96+
97+
// GetStringHash returns a base64-encoded SHA256 hash of the input string.
98+
func GetStringHash(data string) string {
99+
hash := sha256.Sum256([]byte(data))
100+
return base64.StdEncoding.EncodeToString(hash[:])
101+
}

pkg/utils/utils_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,4 +83,33 @@ var _ = Describe("Utils tests", func() {
8383
Expect(result).To(Equal(consts.DefaultCniNetworkDirectory))
8484
})
8585
})
86+
87+
Context("Testing GetStringHash", func() {
88+
It("Should return consistent hash for same input", func() {
89+
input := `{"resourceName": "rdma_shared_device_a"}`
90+
hash1 := GetStringHash(input)
91+
hash2 := GetStringHash(input)
92+
Expect(hash1).To(Equal(hash2))
93+
})
94+
95+
It("Should return different hash for different input", func() {
96+
input1 := `{"resourceName": "rdma_shared_device_a"}`
97+
input2 := `{"resourceName": "rdma_shared_device_b"}`
98+
hash1 := GetStringHash(input1)
99+
hash2 := GetStringHash(input2)
100+
Expect(hash1).NotTo(Equal(hash2))
101+
})
102+
103+
It("Should return non-empty hash for empty string", func() {
104+
hash := GetStringHash("")
105+
Expect(hash).NotTo(BeEmpty())
106+
})
107+
108+
It("Should return valid base64 encoded string", func() {
109+
input := `{"config": "test"}`
110+
hash := GetStringHash(input)
111+
// Base64 encoded SHA256 hash should be 44 characters
112+
Expect(len(hash)).To(Equal(44))
113+
})
114+
})
86115
})

0 commit comments

Comments
 (0)