Skip to content

Commit a3e8a58

Browse files
committed
This commit introduces a new redesign on how the operator resets the device plugin
* use a general nodeSelector to avoid updating the daemonset yaml * remove the config-daemon removing pod (better security) * make the operator in charge of resetting the device plugin via annotations * mark the node as cordon BEFORE we remove the device plugin (without drain) to avoid scheduling new pods until the device plugin is backed up Signed-off-by: Sebastian Sch <[email protected]>
1 parent ee40683 commit a3e8a58

File tree

14 files changed

+268
-582
lines changed

14 files changed

+268
-582
lines changed

controllers/drain_controller.go

Lines changed: 148 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,29 @@ func (dr *DrainReconcile) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
172172
return reconcile.Result{RequeueAfter: 5 * time.Second}, nil
173173
}
174174

175+
// check the device plugin exited and enable it again
176+
// only of we have something in the node state spec
177+
if len(nodeNetworkState.Spec.Interfaces) > 0 {
178+
completed, err = dr.enableSriovDevicePlugin(ctx, node)
179+
if err != nil {
180+
reqLogger.Error(err, "failed to enable SriovDevicePlugin")
181+
dr.recorder.Event(nodeNetworkState,
182+
corev1.EventTypeWarning,
183+
"DrainController",
184+
"failed to enable SriovDevicePlugin")
185+
return ctrl.Result{}, err
186+
}
187+
188+
if !completed {
189+
reqLogger.Info("sriov device plugin enable was not completed")
190+
dr.recorder.Event(nodeNetworkState,
191+
corev1.EventTypeWarning,
192+
"DrainController",
193+
"sriov device plugin enable was not completed")
194+
return reconcile.Result{RequeueAfter: 5 * time.Second}, nil
195+
}
196+
}
197+
175198
// move the node state back to idle
176199
err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainIdle, dr.Client)
177200
if err != nil {
@@ -209,7 +232,7 @@ func (dr *DrainReconcile) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
209232
}
210233
}
211234

212-
// class the drain function that will also call drain to other platform providers like openshift
235+
// call the drain function that will also call drain to other platform providers like openshift
213236
drained, err := dr.drainer.DrainNode(ctx, node, nodeDrainAnnotation == constants.RebootRequired)
214237
if err != nil {
215238
reqLogger.Error(err, "error trying to drain the node")
@@ -230,6 +253,17 @@ func (dr *DrainReconcile) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
230253
return reconcile.Result{RequeueAfter: 5 * time.Second}, nil
231254
}
232255

256+
reqLogger.Info("remove Device plugin from node")
257+
err = utils.LabelNode(ctx, node.Name, constants.SriovDevicePluginEnabledLabel, constants.SriovDevicePluginEnabledLabelDisabled, dr.Client)
258+
if err != nil {
259+
log.Log.Error(err, "failed to label node for device plugin label",
260+
"labelKey",
261+
constants.SriovDevicePluginEnabledLabel,
262+
"labelValue",
263+
constants.SriovDevicePluginEnabledLabelDisabled)
264+
return reconcile.Result{}, err
265+
}
266+
233267
// if we manage to drain we label the node state with drain completed and finish
234268
err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainComplete, dr.Client)
235269
if err != nil {
@@ -243,6 +277,60 @@ func (dr *DrainReconcile) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
243277
"DrainController",
244278
"node drain completed")
245279
return ctrl.Result{}, nil
280+
} else if nodeDrainAnnotation == constants.DevicePluginResetRequired {
281+
// nothing to do here we need to wait for the node to move back to idle
282+
if nodeStateDrainAnnotationCurrent == constants.DrainComplete {
283+
reqLogger.Info("node requested a drain and nodeState is on drain completed nothing todo")
284+
return ctrl.Result{}, nil
285+
}
286+
287+
// if we are on idle state we move it to drain
288+
if nodeStateDrainAnnotationCurrent == constants.DrainIdle {
289+
err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.Draining, dr.Client)
290+
if err != nil {
291+
reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.Draining)
292+
return ctrl.Result{}, err
293+
}
294+
return ctrl.Result{}, nil
295+
}
296+
297+
// This cover a case where we only need to reset the device plugin
298+
// for that we are going to cordon the node, so we don't get new pods allocated
299+
// to the node in the time we remove the device plugin
300+
err = dr.drainer.RunCordonOrUncordon(ctx, node, true)
301+
if err != nil {
302+
log.Log.Error(err, "failed to cordon on node")
303+
return reconcile.Result{}, err
304+
}
305+
306+
// we switch the sriov label to disable and mark the drain as completed
307+
// no need to wait for the device plugin to exist here as we cordon the node,
308+
// and we want to config-daemon to start the configuration in parallel of the kube-controller to remove the pod
309+
// we check the device plugin was removed when the config-daemon moves is desire state to idle
310+
reqLogger.Info("disable Device plugin from node")
311+
err = utils.LabelNode(ctx, node.Name, constants.SriovDevicePluginEnabledLabel, constants.SriovDevicePluginEnabledLabelDisabled, dr.Client)
312+
if err != nil {
313+
log.Log.Error(err, "failed to label node for device plugin label",
314+
"labelKey",
315+
constants.SriovDevicePluginEnabledLabel,
316+
"labelValue",
317+
constants.SriovDevicePluginEnabledLabelDisabled)
318+
return reconcile.Result{}, err
319+
}
320+
321+
// if we manage to cordon we label the node state with drain completed and finish
322+
err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainComplete, dr.Client)
323+
if err != nil {
324+
reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.DrainComplete)
325+
return ctrl.Result{}, err
326+
}
327+
328+
reqLogger.Info("node cordoned successfully and device plugin removed")
329+
dr.recorder.Event(nodeNetworkState,
330+
corev1.EventTypeWarning,
331+
"DrainController",
332+
"node cordoned and device plugin removed completed")
333+
return ctrl.Result{}, nil
246334
}
247335

248336
reqLogger.Error(nil, "unexpected node drain annotation")
@@ -436,6 +524,65 @@ func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.N
436524
}
437525
}
438526

527+
// enableSriovDevicePlugin change the device plugin label on the requested node to enable
528+
// if there is a pod still running we will return false
529+
func (dr *DrainReconcile) enableSriovDevicePlugin(ctx context.Context, node *corev1.Node) (bool, error) {
530+
logger := log.FromContext(ctx)
531+
logger.Info("enableSriovDevicePlugin():")
532+
533+
// check if the device plugin is terminating only if the node annotation for device plugin is disabled
534+
if node.Annotations[constants.SriovDevicePluginEnabledLabel] == constants.SriovDevicePluginEnabledLabelDisabled {
535+
pods, err := dr.getDevicePluginPodsOnNode(node.Name)
536+
if err != nil {
537+
logger.Error(err, "failed to list device plugin pods running on node")
538+
return false, err
539+
}
540+
541+
if len(pods.Items) != 0 {
542+
log.Log.V(2).Info("device plugin pod still terminating on node")
543+
return false, nil
544+
}
545+
}
546+
547+
logger.Info("enable Device plugin from node")
548+
err := utils.LabelNode(ctx, node.Name, constants.SriovDevicePluginEnabledLabel, constants.SriovDevicePluginEnabledLabelEnabled, dr.Client)
549+
if err != nil {
550+
log.Log.Error(err, "failed to label node for device plugin label",
551+
"labelKey",
552+
constants.SriovDevicePluginEnabledLabel,
553+
"labelValue",
554+
constants.SriovDevicePluginEnabledLabelEnabled)
555+
return false, err
556+
}
557+
558+
// check if the device plugin pod is running on the node
559+
pods, err := dr.getDevicePluginPodsOnNode(node.Name)
560+
if err != nil {
561+
logger.Error(err, "failed to list device plugin pods running on node")
562+
return false, err
563+
}
564+
565+
if len(pods.Items) == 1 && pods.Items[0].Status.Phase == corev1.PodRunning {
566+
logger.Info("Device plugin pod running on node")
567+
return true, nil
568+
}
569+
570+
logger.V(2).Info("Device plugin pod still not running on node")
571+
return false, nil
572+
}
573+
574+
func (dr *DrainReconcile) getDevicePluginPodsOnNode(nodeName string) (*corev1.PodList, error) {
575+
pods := &corev1.PodList{}
576+
err := dr.List(context.Background(), pods, &client.ListOptions{
577+
Raw: &metav1.ListOptions{
578+
LabelSelector: "app=sriov-device-plugin",
579+
FieldSelector: "spec.nodeName=" + nodeName,
580+
ResourceVersion: "0"},
581+
})
582+
583+
return pods, err
584+
}
585+
439586
// SetupWithManager sets up the controller with the Manager.
440587
func (dr *DrainReconcile) SetupWithManager(mgr ctrl.Manager) error {
441588
createUpdateEnqueue := handler.Funcs{

controllers/helper.go

Lines changed: 16 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,10 @@ import (
2222
"encoding/json"
2323
"fmt"
2424
"os"
25-
"sort"
2625
"strings"
2726

2827
errs "github.com/pkg/errors"
2928
appsv1 "k8s.io/api/apps/v1"
30-
corev1 "k8s.io/api/core/v1"
3129
"k8s.io/apimachinery/pkg/api/equality"
3230
"k8s.io/apimachinery/pkg/api/errors"
3331
uns "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
@@ -152,29 +150,25 @@ func formatJSON(str string) (string, error) {
152150
return prettyJSON.String(), nil
153151
}
154152

153+
// GetDefaultNodeSelector return a nodeSelector with worker and linux os
155154
func GetDefaultNodeSelector() map[string]string {
156155
return map[string]string{"node-role.kubernetes.io/worker": "",
157156
"kubernetes.io/os": "linux"}
158157
}
159158

160-
// hasNoValidPolicy returns true if no SriovNetworkNodePolicy
161-
// or only the (deprecated) "default" policy is present
162-
func hasNoValidPolicy(pl []sriovnetworkv1.SriovNetworkNodePolicy) bool {
163-
switch len(pl) {
164-
case 0:
165-
return true
166-
case 1:
167-
return pl[0].Name == constants.DefaultPolicyName
168-
default:
169-
return false
170-
}
159+
// GetDefaultNodeSelectorForDevicePlugin return a nodeSelector with worker linux os
160+
// and the enabled sriov device plugin
161+
func GetDefaultNodeSelectorForDevicePlugin() map[string]string {
162+
return map[string]string{
163+
"node-role.kubernetes.io/worker": "",
164+
"kubernetes.io/os": "linux",
165+
constants.SriovDevicePluginEnabledLabel: constants.SriovDevicePluginEnabledLabelEnabled}
171166
}
172167

173168
func syncPluginDaemonObjs(ctx context.Context,
174169
client k8sclient.Client,
175170
scheme *runtime.Scheme,
176-
dc *sriovnetworkv1.SriovOperatorConfig,
177-
pl *sriovnetworkv1.SriovNetworkNodePolicyList) error {
171+
dc *sriovnetworkv1.SriovOperatorConfig) error {
178172
logger := log.Log.WithName("syncPluginDaemonObjs")
179173
logger.V(1).Info("Start to sync sriov daemons objects")
180174

@@ -185,24 +179,14 @@ func syncPluginDaemonObjs(ctx context.Context,
185179
data.Data["ReleaseVersion"] = os.Getenv("RELEASEVERSION")
186180
data.Data["ResourcePrefix"] = vars.ResourcePrefix
187181
data.Data["ImagePullSecrets"] = GetImagePullSecrets()
188-
data.Data["NodeSelectorField"] = GetDefaultNodeSelector()
182+
data.Data["NodeSelectorField"] = GetDefaultNodeSelectorForDevicePlugin()
189183
data.Data["UseCDI"] = dc.Spec.UseCDI
190184
objs, err := renderDsForCR(constants.PluginPath, &data)
191185
if err != nil {
192186
logger.Error(err, "Fail to render SR-IoV manifests")
193187
return err
194188
}
195189

196-
if hasNoValidPolicy(pl.Items) {
197-
for _, obj := range objs {
198-
err := deleteK8sResource(ctx, client, obj)
199-
if err != nil {
200-
return err
201-
}
202-
}
203-
return nil
204-
}
205-
206190
// Sync DaemonSets
207191
for _, obj := range objs {
208192
if obj.GetKind() == constants.DaemonSet && len(dc.Spec.ConfigDaemonNodeSelector) > 0 {
@@ -214,13 +198,15 @@ func syncPluginDaemonObjs(ctx context.Context,
214198
return err
215199
}
216200
ds.Spec.Template.Spec.NodeSelector = dc.Spec.ConfigDaemonNodeSelector
201+
// add the special node selector for the device plugin
202+
ds.Spec.Template.Spec.NodeSelector[constants.SriovDevicePluginEnabledLabel] = constants.SriovDevicePluginEnabledLabelEnabled
217203
err = scheme.Convert(ds, obj, nil)
218204
if err != nil {
219205
logger.Error(err, "Fail to convert to Unstructured")
220206
return err
221207
}
222208
}
223-
err = syncDsObject(ctx, client, scheme, dc, pl, obj)
209+
err = syncDsObject(ctx, client, scheme, dc, obj)
224210
if err != nil {
225211
logger.Error(err, "Couldn't sync SR-IoV daemons objects")
226212
return err
@@ -230,14 +216,7 @@ func syncPluginDaemonObjs(ctx context.Context,
230216
return nil
231217
}
232218

233-
func deleteK8sResource(ctx context.Context, client k8sclient.Client, in *uns.Unstructured) error {
234-
if err := apply.DeleteObject(ctx, client, in); err != nil {
235-
return fmt.Errorf("failed to delete object %v with err: %v", in, err)
236-
}
237-
return nil
238-
}
239-
240-
func syncDsObject(ctx context.Context, client k8sclient.Client, scheme *runtime.Scheme, dc *sriovnetworkv1.SriovOperatorConfig, pl *sriovnetworkv1.SriovNetworkNodePolicyList, obj *uns.Unstructured) error {
219+
func syncDsObject(ctx context.Context, client k8sclient.Client, scheme *runtime.Scheme, dc *sriovnetworkv1.SriovOperatorConfig, obj *uns.Unstructured) error {
241220
logger := log.Log.WithName("syncDsObject")
242221
kind := obj.GetKind()
243222
logger.V(1).Info("Start to sync Objects", "Kind", kind)
@@ -257,7 +236,7 @@ func syncDsObject(ctx context.Context, client k8sclient.Client, scheme *runtime.
257236
logger.Error(err, "Fail to convert to DaemonSet")
258237
return err
259238
}
260-
err = syncDaemonSet(ctx, client, scheme, dc, pl, ds)
239+
err = syncDaemonSet(ctx, client, scheme, dc, ds)
261240
if err != nil {
262241
logger.Error(err, "Fail to sync DaemonSet", "Namespace", ds.Namespace, "Name", ds.Name)
263242
return err
@@ -266,54 +245,6 @@ func syncDsObject(ctx context.Context, client k8sclient.Client, scheme *runtime.
266245
return nil
267246
}
268247

269-
func setDsNodeAffinity(pl *sriovnetworkv1.SriovNetworkNodePolicyList, ds *appsv1.DaemonSet) error {
270-
terms := nodeSelectorTermsForPolicyList(pl.Items)
271-
if len(terms) > 0 {
272-
ds.Spec.Template.Spec.Affinity = &corev1.Affinity{
273-
NodeAffinity: &corev1.NodeAffinity{
274-
RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{
275-
NodeSelectorTerms: terms,
276-
},
277-
},
278-
}
279-
}
280-
return nil
281-
}
282-
283-
func nodeSelectorTermsForPolicyList(policies []sriovnetworkv1.SriovNetworkNodePolicy) []corev1.NodeSelectorTerm {
284-
terms := []corev1.NodeSelectorTerm{}
285-
for _, p := range policies {
286-
// Note(adrianc): default policy is deprecated and ignored.
287-
if p.Name == constants.DefaultPolicyName {
288-
continue
289-
}
290-
291-
if len(p.Spec.NodeSelector) == 0 {
292-
continue
293-
}
294-
expressions := []corev1.NodeSelectorRequirement{}
295-
for k, v := range p.Spec.NodeSelector {
296-
exp := corev1.NodeSelectorRequirement{
297-
Operator: corev1.NodeSelectorOpIn,
298-
Key: k,
299-
Values: []string{v},
300-
}
301-
expressions = append(expressions, exp)
302-
}
303-
// sorting is needed to keep the daemon spec stable.
304-
// the items are popped in a random order from the map
305-
sort.Slice(expressions, func(i, j int) bool {
306-
return expressions[i].Key < expressions[j].Key
307-
})
308-
nodeSelector := corev1.NodeSelectorTerm{
309-
MatchExpressions: expressions,
310-
}
311-
terms = append(terms, nodeSelector)
312-
}
313-
314-
return terms
315-
}
316-
317248
// renderDsForCR returns a busybox pod with the same name/namespace as the cr
318249
func renderDsForCR(path string, data *render.RenderData) ([]*uns.Unstructured, error) {
319250
logger := log.Log.WithName("renderDsForCR")
@@ -326,16 +257,11 @@ func renderDsForCR(path string, data *render.RenderData) ([]*uns.Unstructured, e
326257
return objs, nil
327258
}
328259

329-
func syncDaemonSet(ctx context.Context, client k8sclient.Client, scheme *runtime.Scheme, dc *sriovnetworkv1.SriovOperatorConfig, pl *sriovnetworkv1.SriovNetworkNodePolicyList, in *appsv1.DaemonSet) error {
260+
func syncDaemonSet(ctx context.Context, client k8sclient.Client, scheme *runtime.Scheme, dc *sriovnetworkv1.SriovOperatorConfig, in *appsv1.DaemonSet) error {
330261
logger := log.Log.WithName("syncDaemonSet")
331262
logger.V(1).Info("Start to sync DaemonSet", "Namespace", in.Namespace, "Name", in.Name)
332263
var err error
333264

334-
if pl != nil {
335-
if err = setDsNodeAffinity(pl, in); err != nil {
336-
return err
337-
}
338-
}
339265
if err = controllerutil.SetControllerReference(dc, in, scheme); err != nil {
340266
return err
341267
}

0 commit comments

Comments
 (0)