Skip to content

Commit e0ae318

Browse files
committed
Implement RDMA subsystem mode change
Now it's possible to configure RDMA subsystem mode using SR-IOV Network Operator in systemd mode We can't configure RDMA subsystem in a daemon mode because it should be done on host before any network namespace is created.
1 parent ce1385f commit e0ae318

20 files changed

+304
-101
lines changed

api/v1/sriovnetworknodestate_types.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
type SriovNetworkNodeStateSpec struct {
2828
Interfaces Interfaces `json:"interfaces,omitempty"`
2929
Bridges Bridges `json:"bridges,omitempty"`
30+
System System `json:"system,omitempty"`
3031
}
3132

3233
type Interfaces []Interface
@@ -114,10 +115,15 @@ type OVSUplinkConfigExt struct {
114115
Interface OVSInterfaceConfig `json:"interface,omitempty"`
115116
}
116117

118+
type System struct {
119+
RdmaMode string `json:"rdmaMode,omitempty"`
120+
}
121+
117122
// SriovNetworkNodeStateStatus defines the observed state of SriovNetworkNodeState
118123
type SriovNetworkNodeStateStatus struct {
119124
Interfaces InterfaceExts `json:"interfaces,omitempty"`
120125
Bridges Bridges `json:"bridges,omitempty"`
126+
System System `json:"system,omitempty"`
121127
SyncStatus string `json:"syncStatus,omitempty"`
122128
LastSyncError string `json:"lastSyncError,omitempty"`
123129
}

api/v1/sriovnetworkpoolconfig_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ type SriovNetworkPoolConfigSpec struct {
2121
// Drain will respect Pod Disruption Budgets (PDBs) such as etcd quorum guards,
2222
// even if maxUnavailable is greater than one.
2323
MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"`
24+
25+
// +kubebuilder:validation:Enum=shared;exclusive
26+
// RDMA subsystem. Allowed value "shared", "exclusive".
27+
RdmaMode string `json:"rdmaMode,omitempty"`
2428
}
2529

2630
type OvsHardwareOffloadConfig struct {

api/v1/zz_generated.deepcopy.go

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cmd/sriov-network-config-daemon/service.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,21 @@ func phasePre(setupLog logr.Logger, conf *systemd.SriovConfig, hostHelpers helpe
152152
hostHelpers.TryEnableTun()
153153
hostHelpers.TryEnableVhostNet()
154154

155+
if conf.Spec.System.RdmaMode != "" {
156+
rdmaSubsystem, err := hostHelpers.DiscoverRDMASubsystem()
157+
if err != nil {
158+
setupLog.Error(err, "failed to get RDMA subsystem mode")
159+
return fmt.Errorf("failed to get RDMA subsystem mode: %v", err)
160+
}
161+
if rdmaSubsystem != conf.Spec.System.RdmaMode {
162+
err = hostHelpers.SetRDMASubsystem(conf.Spec.System.RdmaMode)
163+
if err != nil {
164+
setupLog.Error(err, "failed to set RDMA subsystem mode")
165+
return fmt.Errorf("failed to set RDMA subsystem mode: %v", err)
166+
}
167+
}
168+
}
169+
155170
return callPlugin(setupLog, PhasePre, conf, hostHelpers)
156171
}
157172

config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,11 @@ spec:
174174
- pciAddress
175175
type: object
176176
type: array
177+
system:
178+
properties:
179+
rdmaMode:
180+
type: string
181+
type: object
177182
type: object
178183
status:
179184
description: SriovNetworkNodeStateStatus defines the observed state of
@@ -335,6 +340,11 @@ spec:
335340
type: string
336341
syncStatus:
337342
type: string
343+
system:
344+
properties:
345+
rdmaMode:
346+
type: string
347+
type: object
338348
type: object
339349
type: object
340350
served: true

config/crd/bases/sriovnetwork.openshift.io_sriovnetworkpoolconfigs.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ spec:
111111
Name is the name of MachineConfigPool to be enabled with OVS hardware offload
112112
type: string
113113
type: object
114+
rdmaMode:
115+
description: RDMA subsystem. Allowed value "shared", "exclusive".
116+
enum:
117+
- shared
118+
- exclusive
119+
type: string
114120
type: object
115121
status:
116122
description: SriovNetworkPoolConfigStatus defines the observed state of

controllers/drain_controller.go

Lines changed: 1 addition & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,8 @@ import (
2424

2525
corev1 "k8s.io/api/core/v1"
2626
"k8s.io/apimachinery/pkg/api/errors"
27-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
28-
"k8s.io/apimachinery/pkg/labels"
2927
"k8s.io/apimachinery/pkg/runtime"
3028
"k8s.io/apimachinery/pkg/types"
31-
"k8s.io/apimachinery/pkg/util/intstr"
3229
"k8s.io/client-go/tools/record"
3330
"k8s.io/client-go/util/workqueue"
3431
ctrl "sigs.k8s.io/controller-runtime"
@@ -48,13 +45,6 @@ import (
4845
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
4946
)
5047

51-
var (
52-
oneNode = intstr.FromInt32(1)
53-
defaultNpcl = &sriovnetworkv1.SriovNetworkPoolConfig{Spec: sriovnetworkv1.SriovNetworkPoolConfigSpec{
54-
MaxUnavailable: &oneNode,
55-
NodeSelector: &metav1.LabelSelector{}}}
56-
)
57-
5848
type DrainReconcile struct {
5949
client.Client
6050
Scheme *runtime.Scheme
@@ -346,94 +336,7 @@ func (dr *DrainReconcile) tryDrainNode(ctx context.Context, node *corev1.Node) (
346336
}
347337

348338
func (dr *DrainReconcile) findNodePoolConfig(ctx context.Context, node *corev1.Node) (*sriovnetworkv1.SriovNetworkPoolConfig, []corev1.Node, error) {
349-
logger := log.FromContext(ctx)
350-
logger.Info("findNodePoolConfig():")
351-
// get all the sriov network pool configs
352-
npcl := &sriovnetworkv1.SriovNetworkPoolConfigList{}
353-
err := dr.List(ctx, npcl)
354-
if err != nil {
355-
logger.Error(err, "failed to list sriovNetworkPoolConfig")
356-
return nil, nil, err
357-
}
358-
359-
selectedNpcl := []*sriovnetworkv1.SriovNetworkPoolConfig{}
360-
nodesInPools := map[string]interface{}{}
361-
362-
for _, npc := range npcl.Items {
363-
// we skip hw offload objects
364-
if npc.Spec.OvsHardwareOffloadConfig.Name != "" {
365-
continue
366-
}
367-
368-
if npc.Spec.NodeSelector == nil {
369-
npc.Spec.NodeSelector = &metav1.LabelSelector{}
370-
}
371-
372-
selector, err := metav1.LabelSelectorAsSelector(npc.Spec.NodeSelector)
373-
if err != nil {
374-
logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", npc.Spec.NodeSelector)
375-
return nil, nil, err
376-
}
377-
378-
if selector.Matches(labels.Set(node.Labels)) {
379-
selectedNpcl = append(selectedNpcl, npc.DeepCopy())
380-
}
381-
382-
nodeList := &corev1.NodeList{}
383-
err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector})
384-
if err != nil {
385-
logger.Error(err, "failed to list all the nodes matching the pool with label selector from nodeSelector",
386-
"machineConfigPoolName", npc,
387-
"nodeSelector", npc.Spec.NodeSelector)
388-
return nil, nil, err
389-
}
390-
391-
for _, nodeName := range nodeList.Items {
392-
nodesInPools[nodeName.Name] = nil
393-
}
394-
}
395-
396-
if len(selectedNpcl) > 1 {
397-
// don't allow the node to be part of multiple pools
398-
err = fmt.Errorf("node is part of more then one pool")
399-
logger.Error(err, "multiple pools founded for a specific node", "numberOfPools", len(selectedNpcl), "pools", selectedNpcl)
400-
return nil, nil, err
401-
} else if len(selectedNpcl) == 1 {
402-
// found one pool for our node
403-
logger.V(2).Info("found sriovNetworkPool", "pool", *selectedNpcl[0])
404-
selector, err := metav1.LabelSelectorAsSelector(selectedNpcl[0].Spec.NodeSelector)
405-
if err != nil {
406-
logger.Error(err, "failed to create label selector from nodeSelector", "nodeSelector", selectedNpcl[0].Spec.NodeSelector)
407-
return nil, nil, err
408-
}
409-
410-
// list all the nodes that are also part of this pool and return them
411-
nodeList := &corev1.NodeList{}
412-
err = dr.List(ctx, nodeList, &client.ListOptions{LabelSelector: selector})
413-
if err != nil {
414-
logger.Error(err, "failed to list nodes using with label selector", "labelSelector", selector)
415-
return nil, nil, err
416-
}
417-
418-
return selectedNpcl[0], nodeList.Items, nil
419-
} else {
420-
// in this case we get all the nodes and remove the ones that already part of any pool
421-
logger.V(1).Info("node doesn't belong to any pool, using default drain configuration with MaxUnavailable of one", "pool", *defaultNpcl)
422-
nodeList := &corev1.NodeList{}
423-
err = dr.List(ctx, nodeList)
424-
if err != nil {
425-
logger.Error(err, "failed to list all the nodes")
426-
return nil, nil, err
427-
}
428-
429-
defaultNodeLists := []corev1.Node{}
430-
for _, nodeObj := range nodeList.Items {
431-
if _, exist := nodesInPools[nodeObj.Name]; !exist {
432-
defaultNodeLists = append(defaultNodeLists, nodeObj)
433-
}
434-
}
435-
return defaultNpcl, defaultNodeLists, nil
436-
}
339+
return utils.FindNodePoolConfig(ctx, node, dr.Client)
437340
}
438341

439342
// SetupWithManager sets up the controller with the Manager.

controllers/sriovnetworknodepolicy_controller.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ import (
4646
sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
4747
constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
4848
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate"
49+
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils"
4950
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
5051
)
5152

@@ -269,6 +270,13 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con
269270
ns.Name = node.Name
270271
ns.Namespace = vars.Namespace
271272
j, _ := json.Marshal(ns)
273+
netPoolConfig, _, err := utils.FindNodePoolConfig(context.Background(), &node, r.Client)
274+
if err != nil {
275+
log.Log.Error(err, "nodeStateSyncHandler(): failed to get SriovNetworkPoolConfig for the current node")
276+
}
277+
if netPoolConfig != nil {
278+
ns.Spec.System.RdmaMode = netPoolConfig.Spec.RdmaMode
279+
}
272280
logger.V(2).Info("SriovNetworkNodeState CR", "content", j)
273281
if err := r.syncSriovNetworkNodeState(ctx, dc, npl, ns, &node); err != nil {
274282
logger.Error(err, "Fail to sync", "SriovNetworkNodeState", ns.Name)

controllers/sriovnetworkpoolconfig_controller.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,19 @@ func (r *SriovNetworkPoolConfigReconciler) Reconcile(ctx context.Context, req ct
7373
return reconcile.Result{}, err
7474
}
7575

76+
// RdmaMode could be set in systemd mode only
77+
if instance.Spec.RdmaMode != "" {
78+
operatorConfig := &sriovnetworkv1.SriovOperatorConfig{}
79+
err := r.Get(ctx, types.NamespacedName{Namespace: vars.Namespace, Name: constants.DefaultConfigName}, operatorConfig)
80+
if err != nil {
81+
logger.Error(err, "failed to list SriovOperatorConfig")
82+
return reconcile.Result{}, err
83+
}
84+
if operatorConfig.Spec.ConfigurationMode == sriovnetworkv1.DaemonConfigurationMode {
85+
logger.Info("rdmaSpec is ignored in 'daemon' configuration mode")
86+
}
87+
}
88+
7689
// we don't need a finalizer for pools that doesn't use the ovs hardware offload feature
7790
if instance.Spec.OvsHardwareOffloadConfig.Name == "" {
7891
return ctrl.Result{}, nil

deployment/sriov-network-operator-chart/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,11 @@ spec:
174174
- pciAddress
175175
type: object
176176
type: array
177+
system:
178+
properties:
179+
rdmaMode:
180+
type: string
181+
type: object
177182
type: object
178183
status:
179184
description: SriovNetworkNodeStateStatus defines the observed state of
@@ -335,6 +340,11 @@ spec:
335340
type: string
336341
syncStatus:
337342
type: string
343+
system:
344+
properties:
345+
rdmaMode:
346+
type: string
347+
type: object
338348
type: object
339349
type: object
340350
served: true

0 commit comments

Comments
 (0)