Skip to content

Commit 60432e0

Browse files
committed
RDMA subsystem is implemented via ib_core module config.
1 parent 34dcef5 commit 60432e0

21 files changed

+357
-104
lines changed

api/v1/sriovnetworknodestate_types.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
type SriovNetworkNodeStateSpec struct {
2828
Interfaces Interfaces `json:"interfaces,omitempty"`
2929
Bridges Bridges `json:"bridges,omitempty"`
30+
System System `json:"system,omitempty"`
3031
}
3132

3233
type Interfaces []Interface
@@ -114,10 +115,17 @@ type OVSUplinkConfigExt struct {
114115
Interface OVSInterfaceConfig `json:"interface,omitempty"`
115116
}
116117

118+
type System struct {
119+
// +kubebuilder:validation:Enum=shared;exclusive
120+
//RDMA subsystem. Allowed value "shared", "exclusive".
121+
RdmaMode string `json:"rdmaMode,omitempty"`
122+
}
123+
117124
// SriovNetworkNodeStateStatus defines the observed state of SriovNetworkNodeState
118125
type SriovNetworkNodeStateStatus struct {
119126
Interfaces InterfaceExts `json:"interfaces,omitempty"`
120127
Bridges Bridges `json:"bridges,omitempty"`
128+
System System `json:"system,omitempty"`
121129
SyncStatus string `json:"syncStatus,omitempty"`
122130
LastSyncError string `json:"lastSyncError,omitempty"`
123131
}

api/v1/sriovnetworkpoolconfig_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ type SriovNetworkPoolConfigSpec struct {
2121
// Drain will respect Pod Disruption Budgets (PDBs) such as etcd quorum guards,
2222
// even if maxUnavailable is greater than one.
2323
MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"`
24+
25+
// +kubebuilder:validation:Enum=shared;exclusive
26+
// RDMA subsystem. Allowed value "shared", "exclusive".
27+
RdmaMode string `json:"rdmaMode,omitempty"`
2428
}
2529

2630
type OvsHardwareOffloadConfig struct {

api/v1/zz_generated.deepcopy.go

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,15 @@ spec:
174174
- pciAddress
175175
type: object
176176
type: array
177+
system:
178+
properties:
179+
rdmaMode:
180+
description: RDMA subsystem. Allowed value "shared", "exclusive".
181+
enum:
182+
- shared
183+
- exclusive
184+
type: string
185+
type: object
177186
type: object
178187
status:
179188
description: SriovNetworkNodeStateStatus defines the observed state of
@@ -335,6 +344,15 @@ spec:
335344
type: string
336345
syncStatus:
337346
type: string
347+
system:
348+
properties:
349+
rdmaMode:
350+
description: RDMA subsystem. Allowed value "shared", "exclusive".
351+
enum:
352+
- shared
353+
- exclusive
354+
type: string
355+
type: object
338356
type: object
339357
type: object
340358
served: true

config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ spec:
111111
Name is the name of MachineConfigPool to be enabled with OVS hardware offload
112112
type: string
113113
type: object
114+
rdmaMode:
115+
description: RDMA subsystem. Allowed value "shared", "exclusive".
116+
enum:
117+
- shared
118+
- exclusive
119+
type: string
114120
type: object
115121
status:
116122
description: SriovNetworkPoolConfigStatus defines the observed state of

controllers/drain_controller.go

Lines changed: 1 addition & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,8 @@ import (
2424

2525
corev1 "k8s.io/api/core/v1"
2626
"k8s.io/apimachinery/pkg/api/errors"
27-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
28-
"k8s.io/apimachinery/pkg/labels"
2927
"k8s.io/apimachinery/pkg/runtime"
3028
"k8s.io/apimachinery/pkg/types"
31-
"k8s.io/apimachinery/pkg/util/intstr"
3229
"k8s.io/client-go/tools/record"
3330
"k8s.io/client-go/util/workqueue"
3431
ctrl "sigs.k8s.io/controller-runtime"
@@ -48,13 +45,6 @@ import (
4845
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
4946
)
5047

51-
var (
52-
oneNode = intstr.FromInt32(1)
53-
defaultNpcl = &sriovnetworkv1.SriovNetworkPoolConfig{Spec: sriovnetworkv1.SriovNetworkPoolConfigSpec{
54-
MaxUnavailable: &oneNode,
55-
NodeSelector: &metav1.LabelSelector{}}}
56-
)
57-
5848
type DrainReconcile struct {
5949
client.Client
6050
Scheme *runtime.Scheme
@@ -346,94 +336,7 @@ func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) (
346336
}
347337

348338
func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.Node) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) {
349-
logger := log.FromContext(ctx)
350-
logger.Info("findNodePoolConfig():")
351-
// get all the sriov network pool configs
352-
npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{}
353-
err := dr.List(ctx, npcl)
354-
if err != nil {
355-
logger.Error(err, "failed to list sriovNetworkPoolConfig")
356-
return nil, nil, err
357-
}
358-
359-
selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{}
360-
nodesInPools := map[string]interface{}{}
361-
362-
for _, npc := range npcl.Items {
363-
// we skip hw offload objects
364-
if npc.Spec.OvsHardwareOffloadConfig.Name != "" {
365-
continue
366-
}
367-
368-
if npc.Spec.NodeSelector == nil {
369-
npc.Spec.NodeSelector = &metav1.LabelSelector{}
370-
}
371-
372-
selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector)
373-
if err != nil {
374-
logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector)
375-
return nil, nil, err
376-
}
377-
378-
if selector.Matches(labels.Set(node.Labels)) {
379-
selectedNpcl = append(selectedNpcl, npc.DeepCopy())
380-
}
381-
382-
nodeList := &corev1.NodeList{}
383-
err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector})
384-
if err != nil {
385-
logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector",
386-
"machineConfigPoolName", npc,
387-
"nodeSelector", npc.Spec.NodeSelector)
388-
return nil, nil, err
389-
}
390-
391-
for _, nodeName := range nodeList.Items {
392-
nodesInPools[nodeName.Name] = nil
393-
}
394-
}
395-
396-
if len(selectedNpcl) > 1 {
397-
// don't allow the node to be part of multiple pools
398-
err = fmt.Errorf("node is part of more then one pool")
399-
logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl)
400-
return nil, nil, err
401-
} else if len(selectedNpcl) == 1 {
402-
// found one pool for our node
403-
logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0])
404-
selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector)
405-
if err != nil {
406-
logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector)
407-
return nil, nil, err
408-
}
409-
410-
// list all the nodes that are also part of this pool and return them
411-
nodeList := &corev1.NodeList{}
412-
err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector})
413-
if err != nil {
414-
logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector)
415-
return nil, nil, err
416-
}
417-
418-
return selectedNpcl[0], nodeList.Items, nil
419-
} else {
420-
// in this case we get all the nodes and remove the ones that already part of any pool
421-
logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultNpcl)
422-
nodeList := &corev1.NodeList{}
423-
err = dr.List(ctx, nodeList)
424-
if err != nil {
425-
logger.Error(err, "failed to list all the nodes")
426-
return nil, nil, err
427-
}
428-
429-
defaultNodeLists := []corev1.Node{}
430-
for _, nodeObj := range nodeList.Items {
431-
if _, exist := nodesInPools[nodeObj.Name]; !exist {
432-
defaultNodeLists = append(defaultNodeLists, nodeObj)
433-
}
434-
}
435-
return defaultNpcl, defaultNodeLists, nil
436-
}
339+
return findNodePoolConfig(ctx, node, dr.Client)
437340
}
438341

439342
// SetupWithManager sets up the controller with the Manager.

controllers/helper.go

Lines changed: 105 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,12 @@ import (
3030
corev1 "k8s.io/api/core/v1"
3131
"k8s.io/apimachinery/pkg/api/equality"
3232
"k8s.io/apimachinery/pkg/api/errors"
33+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3334
uns "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
35+
"k8s.io/apimachinery/pkg/labels"
3436
"k8s.io/apimachinery/pkg/runtime"
3537
"k8s.io/apimachinery/pkg/types"
38+
"k8s.io/apimachinery/pkg/util/intstr"
3639
kscheme "k8s.io/client-go/kubernetes/scheme"
3740
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
3841
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
@@ -47,10 +50,17 @@ import (
4750
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
4851
)
4952

50-
var webhooks = map[string](string){
51-
constants.InjectorWebHookName: constants.InjectorWebHookPath,
52-
constants.OperatorWebHookName: constants.OperatorWebHookPath,
53-
}
53+
var (
54+
webhooks = map[string](string){
55+
constants.InjectorWebHookName: constants.InjectorWebHookPath,
56+
constants.OperatorWebHookName: constants.OperatorWebHookPath,
57+
}
58+
oneNode = intstr.FromInt32(1)
59+
defaultPoolConfig = &sriovnetworkv1.SriovNetworkPoolConfig{Spec: sriovnetworkv1.SriovNetworkPoolConfigSpec{
60+
MaxUnavailable: &oneNode,
61+
NodeSelector: &metav1.LabelSelector{},
62+
RdmaMode: ""}}
63+
)
5464

5565
const (
5666
clusterRoleResourceName = "ClusterRole"
@@ -397,3 +407,94 @@ func updateDaemonsetNodeSelector(obj *uns.Unstructured, nodeSelector map[string]
397407
}
398408
return nil
399409
}
410+
411+
func findNodePoolConfig(ctx context.Context, node *corev1.Node, c k8sclient.Client) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) {
412+
logger := log.FromContext(ctx)
413+
logger.Info("FindNodePoolConfig():")
414+
// get all the sriov network pool configs
415+
npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{}
416+
err := c.List(ctx, npcl)
417+
if err != nil {
418+
logger.Error(err, "failed to list sriovNetworkPoolConfig")
419+
return nil, nil, err
420+
}
421+
422+
selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{}
423+
nodesInPools := map[string]interface{}{}
424+
425+
for _, npc := range npcl.Items {
426+
// we skip hw offload objects
427+
if npc.Spec.OvsHardwareOffloadConfig.Name != "" {
428+
continue
429+
}
430+
431+
if npc.Spec.NodeSelector == nil {
432+
npc.Spec.NodeSelector = &metav1.LabelSelector{}
433+
}
434+
435+
selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector)
436+
if err != nil {
437+
logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector)
438+
return nil, nil, err
439+
}
440+
441+
if selector.Matches(labels.Set(node.Labels)) {
442+
selectedNpcl = append(selectedNpcl, npc.DeepCopy())
443+
}
444+
445+
nodeList := &corev1.NodeList{}
446+
err = c.List(ctx, nodeList, &k8sclient.ListOptions{LabelSelector: selector})
447+
if err != nil {
448+
logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector",
449+
"machineConfigPoolName", npc,
450+
"nodeSelector", npc.Spec.NodeSelector)
451+
return nil, nil, err
452+
}
453+
454+
for _, nodeName := range nodeList.Items {
455+
nodesInPools[nodeName.Name] = nil
456+
}
457+
}
458+
459+
if len(selectedNpcl) > 1 {
460+
// don't allow the node to be part of multiple pools
461+
err = fmt.Errorf("node is part of more then one pool")
462+
logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl)
463+
return nil, nil, err
464+
} else if len(selectedNpcl) == 1 {
465+
// found one pool for our node
466+
logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0])
467+
selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector)
468+
if err != nil {
469+
logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector)
470+
return nil, nil, err
471+
}
472+
473+
// list all the nodes that are also part of this pool and return them
474+
nodeList := &corev1.NodeList{}
475+
err = c.List(ctx, nodeList, &k8sclient.ListOptions{LabelSelector: selector})
476+
if err != nil {
477+
logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector)
478+
return nil, nil, err
479+
}
480+
481+
return selectedNpcl[0], nodeList.Items, nil
482+
} else {
483+
// in this case we get all the nodes and remove the ones that already part of any pool
484+
logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultPoolConfig)
485+
nodeList := &corev1.NodeList{}
486+
err = c.List(ctx, nodeList)
487+
if err != nil {
488+
logger.Error(err, "failed to list all the nodes")
489+
return nil, nil, err
490+
}
491+
492+
defaultNodeLists := []corev1.Node{}
493+
for _, nodeObj := range nodeList.Items {
494+
if _, exist := nodesInPools[nodeObj.Name]; !exist {
495+
defaultNodeLists = append(defaultNodeLists, nodeObj)
496+
}
497+
}
498+
return defaultPoolConfig, defaultNodeLists, nil
499+
}
500+
}

controllers/sriovnetworknodepolicy_controller.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,13 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con
272272
ns.Name = node.Name
273273
ns.Namespace = vars.Namespace
274274
j, _ := json.Marshal(ns)
275+
netPoolConfig, _, err := findNodePoolConfig(ctx, &node, r.Client)
276+
if err != nil {
277+
log.Log.Error(err, "nodeStateSyncHandler(): failed to get SriovNetworkPoolConfig for the current node")
278+
}
279+
if netPoolConfig != nil {
280+
ns.Spec.System.RdmaMode = netPoolConfig.Spec.RdmaMode
281+
}
275282
logger.V(2).Info("SriovNetworkNodeState CR", "content", j)
276283
if err := r.syncSriovNetworkNodeState(ctx, dc, npl, ns, &node); err != nil {
277284
logger.Error(err, "Fail to sync", "SriovNetworkNodeState", ns.Name)

0 commit comments

Comments
 (0)