Skip to content

Commit 146b706

Browse files
committed
This commit introduces a new redesign on how the operator resets the device plugin
* use a general nodeSelector to avoid updating the daemonset yaml * remove the config-daemon removing pod (better security) * make the operator in charge of resetting the device plugin via annotations * mark the node as cordon BEFORE we remove the device plugin (without drain) to avoid scheduling new pods until the device plugin is backed up Signed-off-by: Sebastian Sch <[email protected]>
1 parent 4bae6ce commit 146b706

File tree

18 files changed

+758
-853
lines changed

18 files changed

+758
-853
lines changed

controllers/drain_controller.go

Lines changed: 14 additions & 258 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,10 @@ import (
2020
"context"
2121
"fmt"
2222
"sync"
23-
"time"
2423

2524
corev1 "k8s.io/api/core/v1"
2625
"k8s.io/apimachinery/pkg/api/errors"
2726
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
28-
"k8s.io/apimachinery/pkg/labels"
2927
"k8s.io/apimachinery/pkg/runtime"
3028
"k8s.io/apimachinery/pkg/types"
3129
"k8s.io/apimachinery/pkg/util/intstr"
@@ -136,11 +134,11 @@ func (dr *DrainReconcile) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
136134
if nodeDrainAnnotation == constants.DrainIdle {
137135
// this cover the case the node is on idle
138136

139-
// node request to be on idle and the currect state is idle
140-
// we don't do anything
137+
// node request to be on idle and the current state is idle
138+
// in this case we check if vf configuration exist in the nodeState
139+
// if not we remove the device plugin node selector label from the node
141140
if nodeStateDrainAnnotationCurrent == constants.DrainIdle {
142-
reqLogger.Info("node and nodeState are on idle nothing todo")
143-
return reconcile.Result{}, nil
141+
return dr.handleNodeIdleNodeStateIdle(ctx, &reqLogger, node, nodeNetworkState)
144142
}
145143

146144
// we have two options here:
@@ -151,98 +149,19 @@ func (dr *DrainReconcile) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
151149
// doesn't need to drain anymore, so we can stop the drain
152150
if nodeStateDrainAnnotationCurrent == constants.DrainComplete ||
153151
nodeStateDrainAnnotationCurrent == constants.Draining {
154-
completed, err := dr.drainer.CompleteDrainNode(ctx, node)
155-
if err != nil {
156-
reqLogger.Error(err, "failed to complete drain on node")
157-
dr.recorder.Event(nodeNetworkState,
158-
corev1.EventTypeWarning,
159-
"DrainController",
160-
"failed to drain node")
161-
return ctrl.Result{}, err
162-
}
163-
164-
// if we didn't manage to complete the un drain of the node we retry
165-
if !completed {
166-
reqLogger.Info("complete drain was not completed re queueing the request")
167-
dr.recorder.Event(nodeNetworkState,
168-
corev1.EventTypeWarning,
169-
"DrainController",
170-
"node complete drain was not completed")
171-
// TODO: make this time configurable
172-
return reconcile.Result{RequeueAfter: 5 * time.Second}, nil
173-
}
174-
175-
// move the node state back to idle
176-
err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainIdle, dr.Client)
177-
if err != nil {
178-
reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.DrainIdle)
179-
return ctrl.Result{}, err
180-
}
181-
182-
reqLogger.Info("completed the un drain for node")
183-
dr.recorder.Event(nodeNetworkState,
184-
corev1.EventTypeWarning,
185-
"DrainController",
186-
"node un drain completed")
187-
return ctrl.Result{}, nil
188-
}
189-
} else if nodeDrainAnnotation == constants.DrainRequired || nodeDrainAnnotation == constants.RebootRequired {
190-
// this cover the case a node request to drain or reboot
191-
192-
// nothing to do here we need to wait for the node to move back to idle
193-
if nodeStateDrainAnnotationCurrent == constants.DrainComplete {
194-
reqLogger.Info("node requested a drain and nodeState is on drain completed nothing todo")
195-
return ctrl.Result{}, nil
196-
}
197-
198-
// we need to start the drain, but first we need to check that we can drain the node
199-
if nodeStateDrainAnnotationCurrent == constants.DrainIdle {
200-
result, err := dr.tryDrainNode(ctx, node)
201-
if err != nil {
202-
reqLogger.Error(err, "failed to check if we can drain the node")
203-
return ctrl.Result{}, err
204-
}
205-
206-
// in case we need to wait because we just to the max number of draining nodes
207-
if result != nil {
208-
return *result, nil
209-
}
210-
}
211-
212-
// class the drain function that will also call drain to other platform providers like openshift
213-
drained, err := dr.drainer.DrainNode(ctx, node, nodeDrainAnnotation == constants.RebootRequired)
214-
if err != nil {
215-
reqLogger.Error(err, "error trying to drain the node")
216-
dr.recorder.Event(nodeNetworkState,
217-
corev1.EventTypeWarning,
218-
"DrainController",
219-
"failed to drain node")
220-
return reconcile.Result{}, err
221-
}
222-
223-
// if we didn't manage to complete the drain of the node we retry
224-
if !drained {
225-
reqLogger.Info("the nodes was not drained re queueing the request")
226-
dr.recorder.Event(nodeNetworkState,
227-
corev1.EventTypeWarning,
228-
"DrainController",
229-
"node drain operation was not completed")
230-
return reconcile.Result{RequeueAfter: 5 * time.Second}, nil
152+
return dr.handleNodeIdleNodeStateDrainingOrCompleted(ctx, &reqLogger, node, nodeNetworkState)
231153
}
154+
}
232155

233-
// if we manage to drain we label the node state with drain completed and finish
234-
err = utils.AnnotateObject(ctx, nodeNetworkState, constants.NodeStateDrainAnnotationCurrent, constants.DrainComplete, dr.Client)
235-
if err != nil {
236-
reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.DrainComplete)
237-
return ctrl.Result{}, err
238-
}
156+
// this cover the case a node request to drain or reboot
157+
if nodeDrainAnnotation == constants.DrainRequired ||
158+
nodeDrainAnnotation == constants.RebootRequired {
159+
return dr.handleNodeDrainOrReboot(ctx, &reqLogger, node, nodeNetworkState, nodeDrainAnnotation, nodeStateDrainAnnotationCurrent)
160+
}
239161

240-
reqLogger.Info("node drained successfully")
241-
dr.recorder.Event(nodeNetworkState,
242-
corev1.EventTypeWarning,
243-
"DrainController",
244-
"node drain completed")
245-
return ctrl.Result{}, nil
162+
// this cover the case a node request to only reset the device plugin
163+
if nodeDrainAnnotation == constants.DevicePluginResetRequired {
164+
return dr.handleNodeDPReset(ctx, &reqLogger, node, nodeNetworkState, nodeStateDrainAnnotationCurrent)
246165
}
247166

248167
reqLogger.Error(nil, "unexpected node drain annotation")
@@ -273,169 +192,6 @@ func (dr *DrainReconcile) ensureAnnotationExists(ctx context.Context, object cli
273192
return value, nil
274193
}
275194

276-
func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) (*reconcile.Result, error) {
277-
// configure logs
278-
reqLogger := log.FromContext(ctx)
279-
reqLogger.Info("checkForNodeDrain():")
280-
281-
//critical section we need to check if we can start the draining
282-
dr.drainCheckMutex.Lock()
283-
defer dr.drainCheckMutex.Unlock()
284-
285-
// find the relevant node pool
286-
nodePool, nodeList, err := dr.findNodePoolConfig(ctx, node)
287-
if err != nil {
288-
reqLogger.Error(err, "failed to find the pool for the requested node")
289-
return nil, err
290-
}
291-
292-
// check how many nodes we can drain in parallel for the specific pool
293-
maxUnv, err := nodePool.MaxUnavailable(len(nodeList))
294-
if err != nil {
295-
reqLogger.Error(err, "failed to calculate max unavailable")
296-
return nil, err
297-
}
298-
299-
current := 0
300-
snns := &sriovnetworkv1.SriovNetworkNodeState{}
301-
302-
var currentSnns *sriovnetworkv1.SriovNetworkNodeState
303-
for _, nodeObj := range nodeList {
304-
err = dr.Get(ctx, client.ObjectKey{Name: nodeObj.GetName(), Namespace: vars.Namespace}, snns)
305-
if err != nil {
306-
if errors.IsNotFound(err) {
307-
reqLogger.V(2).Info("node doesn't have a sriovNetworkNodePolicy")
308-
continue
309-
}
310-
return nil, err
311-
}
312-
313-
if snns.GetName() == node.GetName() {
314-
currentSnns = snns.DeepCopy()
315-
}
316-
317-
if utils.ObjectHasAnnotation(snns, constants.NodeStateDrainAnnotationCurrent, constants.Draining) ||
318-
utils.ObjectHasAnnotation(snns, constants.NodeStateDrainAnnotationCurrent, constants.DrainComplete) {
319-
current++
320-
}
321-
}
322-
reqLogger.Info("Max node allowed to be draining at the same time", "MaxParallelNodeConfiguration", maxUnv)
323-
reqLogger.Info("Count of draining", "drainingNodes", current)
324-
325-
// if maxUnv is zero this means we drain all the nodes in parallel without a limit
326-
if maxUnv == -1 {
327-
reqLogger.Info("draining all the nodes in parallel")
328-
} else if current >= maxUnv {
329-
// the node requested to be drained, but we are at the limit so we re-enqueue the request
330-
reqLogger.Info("MaxParallelNodeConfiguration limit reached for draining nodes re-enqueue the request")
331-
// TODO: make this time configurable
332-
return &reconcile.Result{RequeueAfter: 5 * time.Second}, nil
333-
}
334-
335-
if currentSnns == nil {
336-
return nil, fmt.Errorf("failed to find sriov network node state for requested node")
337-
}
338-
339-
err = utils.AnnotateObject(ctx, currentSnns, constants.NodeStateDrainAnnotationCurrent, constants.Draining, dr.Client)
340-
if err != nil {
341-
reqLogger.Error(err, "failed to annotate node with annotation", "annotation", constants.Draining)
342-
return nil, err
343-
}
344-
345-
return nil, nil
346-
}
347-
348-
func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.Node) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) {
349-
logger := log.FromContext(ctx)
350-
logger.Info("findNodePoolConfig():")
351-
// get all the sriov network pool configs
352-
npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{}
353-
err := dr.List(ctx, npcl)
354-
if err != nil {
355-
logger.Error(err, "failed to list sriovNetworkPoolConfig")
356-
return nil, nil, err
357-
}
358-
359-
selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{}
360-
nodesInPools := map[string]interface{}{}
361-
362-
for _, npc := range npcl.Items {
363-
// we skip hw offload objects
364-
if npc.Spec.OvsHardwareOffloadConfig.Name != "" {
365-
continue
366-
}
367-
368-
if npc.Spec.NodeSelector == nil {
369-
npc.Spec.NodeSelector = &metav1.LabelSelector{}
370-
}
371-
372-
selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector)
373-
if err != nil {
374-
logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector)
375-
return nil, nil, err
376-
}
377-
378-
if selector.Matches(labels.Set(node.Labels)) {
379-
selectedNpcl = append(selectedNpcl, npc.DeepCopy())
380-
}
381-
382-
nodeList := &corev1.NodeList{}
383-
err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector})
384-
if err != nil {
385-
logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector",
386-
"machineConfigPoolName", npc,
387-
"nodeSelector", npc.Spec.NodeSelector)
388-
return nil, nil, err
389-
}
390-
391-
for _, nodeName := range nodeList.Items {
392-
nodesInPools[nodeName.Name] = nil
393-
}
394-
}
395-
396-
if len(selectedNpcl) > 1 {
397-
// don't allow the node to be part of multiple pools
398-
err = fmt.Errorf("node is part of more then one pool")
399-
logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl)
400-
return nil, nil, err
401-
} else if len(selectedNpcl) == 1 {
402-
// found one pool for our node
403-
logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0])
404-
selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector)
405-
if err != nil {
406-
logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector)
407-
return nil, nil, err
408-
}
409-
410-
// list all the nodes that are also part of this pool and return them
411-
nodeList := &corev1.NodeList{}
412-
err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector})
413-
if err != nil {
414-
logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector)
415-
return nil, nil, err
416-
}
417-
418-
return selectedNpcl[0], nodeList.Items, nil
419-
} else {
420-
// in this case we get all the nodes and remove the ones that already part of any pool
421-
logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultNpcl)
422-
nodeList := &corev1.NodeList{}
423-
err = dr.List(ctx, nodeList)
424-
if err != nil {
425-
logger.Error(err, "failed to list all the nodes")
426-
return nil, nil, err
427-
}
428-
429-
defaultNodeLists := []corev1.Node{}
430-
for _, nodeObj := range nodeList.Items {
431-
if _, exist := nodesInPools[nodeObj.Name]; !exist {
432-
defaultNodeLists = append(defaultNodeLists, nodeObj)
433-
}
434-
}
435-
return defaultNpcl, defaultNodeLists, nil
436-
}
437-
}
438-
439195
// SetupWithManager sets up the controller with the Manager.
440196
func (dr *DrainReconcile) SetupWithManager(mgr ctrl.Manager) error {
441197
createUpdateEnqueue := handler.Funcs{

0 commit comments

Comments
 (0)