Skip to content

Commit 2c8bd60

Browse files
Merge pull request #122 from almaslennikov/dms-server
feat: add dms package and use dms to get and set qos settings
2 parents 8aed91c + 2746b28 commit 2c8bd60

File tree

19 files changed

+1677
-122
lines changed

19 files changed

+1677
-122
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ COPY ./ ./
3939
#RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/maintenance-manager/main.go
4040
RUN --mount=type=cache,target=/go/pkg/mod/ GO_GCFLAGS=${GCFLAGS} make build-manager
4141

42-
FROM nvcr.io/nvidia/doca/doca:2.9.2-full-rt-host
42+
FROM nvcr.io/nvidia/doca/doca:3.0.0-full-rt-host
4343

4444
ARG TARGETARCH
4545
ENV MFT_VERSION=4.29.0-131

Dockerfile.nic-configuration-daemon

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ COPY ./ ./
2222
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
2323
RUN --mount=type=cache,target=/go/pkg/mod/ GO_GCFLAGS=${GCFLAGS} make build-daemon
2424

25-
FROM nvcr.io/nvidia/doca/doca:2.9.2-full-rt-host
25+
FROM nvcr.io/nvidia/doca/doca:3.0.0-full-rt-host
2626

2727
ARG TARGETARCH
2828
ENV MFT_VERSION=4.29.0-131

cmd/nic-configuration-daemon/main.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ package main
1818

1919
import (
2020
"flag"
21+
"maps"
2122
"os"
23+
"slices"
2224

2325
maintenanceoperator "github.com/Mellanox/maintenance-operator/api/v1alpha1"
2426
"k8s.io/apimachinery/pkg/runtime"
@@ -34,6 +36,7 @@ import (
3436
"github.com/Mellanox/nic-configuration-operator/internal/controller"
3537
"github.com/Mellanox/nic-configuration-operator/pkg/configuration"
3638
"github.com/Mellanox/nic-configuration-operator/pkg/devicediscovery"
39+
"github.com/Mellanox/nic-configuration-operator/pkg/dms"
3740
"github.com/Mellanox/nic-configuration-operator/pkg/firmware"
3841
"github.com/Mellanox/nic-configuration-operator/pkg/helper"
3942
"github.com/Mellanox/nic-configuration-operator/pkg/host"
@@ -92,7 +95,29 @@ func main() {
9295
hostUtils := host.NewHostUtils()
9396
deviceDiscovery := devicediscovery.NewDeviceDiscovery(nodeName)
9497

95-
configurationManager := configuration.NewConfigurationManager(eventRecorder)
98+
// Initialize DMS manager
99+
dmsManager := dms.NewDMSManager()
100+
101+
// Start DMS instances for all discovered devices
102+
devices, err := deviceDiscovery.DiscoverNicDevices()
103+
if err != nil {
104+
log.Log.Error(err, "failed to discover NIC devices")
105+
os.Exit(1)
106+
}
107+
108+
if err := dmsManager.StartDMSInstances(slices.Collect(maps.Values(devices))); err != nil {
109+
log.Log.Error(err, "failed to start DMS instances")
110+
os.Exit(1)
111+
}
112+
113+
// Ensure DMS instances are stopped when the program exits
114+
defer func() {
115+
if err := dmsManager.StopAllDMSInstances(); err != nil {
116+
log.Log.Error(err, "failed to stop DMS instances")
117+
}
118+
}()
119+
120+
configurationManager := configuration.NewConfigurationManager(eventRecorder, dmsManager)
96121
maintenanceManager := maintenance.New(mgr.GetClient(), hostUtils, nodeName, namespace)
97122
firmwareManager := firmware.NewFirmwareManager(mgr.GetClient(), namespace)
98123

pkg/configuration/configvalidation.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ func (v *configValidationImpl) RuntimeConfigApplied(device *v1alpha1.NicDevice)
299299
log.Log.Error(err, "cannot validate QoS settings", "device", device.Name, "port", port.PCI)
300300
return false, err
301301
}
302-
actualTrust, actualPfc, err := v.utils.GetTrustAndPFC(port.NetworkInterface)
302+
actualTrust, actualPfc, err := v.utils.GetTrustAndPFC(device, port.NetworkInterface)
303303
if err != nil {
304304
log.Log.Error(err, "cannot validate QoS settings", "device", device.Name, "port", port.PCI)
305305
return false, err

pkg/configuration/configvalidation_test.go

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -820,8 +820,8 @@ var _ = Describe("ConfigValidationImpl", func() {
820820
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil)
821821
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil)
822822

823-
mockHostUtils.On("GetTrustAndPFC", "interface0").Return(desiredTrust, desiredPfc, nil)
824-
mockHostUtils.On("GetTrustAndPFC", "interface1").Return(desiredTrust, desiredPfc, nil)
823+
mockHostUtils.On("GetTrustAndPFC", device, "interface0").Return(desiredTrust, desiredPfc, nil)
824+
mockHostUtils.On("GetTrustAndPFC", device, "interface1").Return(desiredTrust, desiredPfc, nil)
825825
})
826826

827827
It("should return true with no error", func() {
@@ -848,8 +848,8 @@ var _ = Describe("ConfigValidationImpl", func() {
848848

849849
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize+128, nil)
850850

851-
mockHostUtils.On("GetTrustAndPFC", "interface0").Return(desiredTrust, desiredPfc, nil)
852-
mockHostUtils.On("GetTrustAndPFC", "interface1").Return(desiredTrust, desiredPfc, nil)
851+
mockHostUtils.On("GetTrustAndPFC", device, "interface0").Return(desiredTrust, desiredPfc, nil)
852+
mockHostUtils.On("GetTrustAndPFC", device, "interface1").Return(desiredTrust, desiredPfc, nil)
853853

854854
// The second port should not be called since the first port already fails
855855
})
@@ -880,7 +880,8 @@ var _ = Describe("ConfigValidationImpl", func() {
880880
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil)
881881
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize+256, nil)
882882

883-
mockHostUtils.On("GetTrustAndPFC", "interface0").Return(desiredTrust, desiredPfc, nil)
883+
mockHostUtils.On("GetTrustAndPFC", device, "interface0").Return(desiredTrust, desiredPfc, nil)
884+
mockHostUtils.On("GetTrustAndPFC", device, "interface1").Return(desiredTrust, desiredPfc, nil)
884885
})
885886

886887
It("should return false with no error", func() {
@@ -897,7 +898,7 @@ var _ = Describe("ConfigValidationImpl", func() {
897898
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil)
898899
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil)
899900

900-
mockHostUtils.On("GetTrustAndPFC", "interface0").Return("differentTrust", desiredPfc, nil)
901+
mockHostUtils.On("GetTrustAndPFC", device, "interface0").Return("differentTrust", desiredPfc, nil)
901902
// The second port should not be called since the first port already fails
902903
})
903904

@@ -910,14 +911,13 @@ var _ = Describe("ConfigValidationImpl", func() {
910911

911912
Context("when PFC setting does not match on the second port", func() {
912913
BeforeEach(func() {
913-
desiredMaxReadReqSize, desiredTrust, desiredPfc := validator.CalculateDesiredRuntimeConfig(device)
914+
desiredMaxReadReqSize, desiredTrust, _ := validator.CalculateDesiredRuntimeConfig(device)
914915

915916
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil)
916917
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil)
917918

918-
mockHostUtils.On("GetTrustAndPFC", "interface0").Return(desiredTrust, desiredPfc, nil)
919-
920-
mockHostUtils.On("GetTrustAndPFC", "interface1").Return(desiredTrust, "differentPfc", nil)
919+
mockHostUtils.On("GetTrustAndPFC", device, "interface0").Return(desiredTrust, "differentPfc", nil)
920+
mockHostUtils.On("GetTrustAndPFC", device, "interface1").Return(desiredTrust, "differentPfc", nil)
921921
})
922922

923923
It("should return false with no error", func() {
@@ -961,7 +961,7 @@ var _ = Describe("ConfigValidationImpl", func() {
961961
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil)
962962
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil)
963963

964-
mockHostUtils.On("GetTrustAndPFC", "interface0").Return("", "", fmt.Errorf("failed to get trust and pfc"))
964+
mockHostUtils.On("GetTrustAndPFC", device, "interface0").Return("", "", fmt.Errorf("failed to get trust and pfc"))
965965
})
966966

967967
It("should return false with the error", func() {
@@ -982,7 +982,7 @@ var _ = Describe("ConfigValidationImpl", func() {
982982
desiredMaxReadReqSize, desiredTrust, desiredPfc := validator.CalculateDesiredRuntimeConfig(device)
983983

984984
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil)
985-
mockHostUtils.On("GetTrustAndPFC", "interface0").Return(desiredTrust, desiredPfc, nil)
985+
mockHostUtils.On("GetTrustAndPFC", device, "interface0").Return(desiredTrust, desiredPfc, nil)
986986
})
987987

988988
It("should return true with no error", func() {
@@ -1002,7 +1002,7 @@ var _ = Describe("ConfigValidationImpl", func() {
10021002
desiredMaxReadReqSize, _, desiredPfc := validator.CalculateDesiredRuntimeConfig(device)
10031003

10041004
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil)
1005-
mockHostUtils.On("GetTrustAndPFC", "interface0").Return("differentTrust", desiredPfc, nil)
1005+
mockHostUtils.On("GetTrustAndPFC", device, "interface0").Return("differentTrust", desiredPfc, nil)
10061006
})
10071007

10081008
It("should return false with no error", func() {

pkg/configuration/manager.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727

2828
"github.com/Mellanox/nic-configuration-operator/api/v1alpha1"
2929
"github.com/Mellanox/nic-configuration-operator/pkg/consts"
30+
"github.com/Mellanox/nic-configuration-operator/pkg/dms"
3031
)
3132

3233
// ConfigurationManager contains logic for configuring NIC devices on the host
@@ -230,6 +231,7 @@ func (h configurationManager) ApplyDeviceRuntimeSpec(device *v1alpha1.NicDevice)
230231
alreadyApplied, err := h.configValidation.RuntimeConfigApplied(device)
231232
if err != nil {
232233
log.Log.Error(err, "failed to verify runtime configuration", "device", device)
234+
return err
233235
}
234236

235237
if alreadyApplied {
@@ -251,8 +253,9 @@ func (h configurationManager) ApplyDeviceRuntimeSpec(device *v1alpha1.NicDevice)
251253
}
252254
}
253255

254-
for _, port := range ports {
255-
err = h.configurationUtils.SetTrustAndPFC(port.NetworkInterface, desiredTrust, desiredPfc)
256+
// Don't apply QoS settings if neither trust nor pfc changes are requested
257+
if desiredTrust != "" || desiredPfc != "" {
258+
err = h.configurationUtils.SetTrustAndPFC(device, desiredTrust, desiredPfc)
256259
if err != nil {
257260
log.Log.Error(err, "failed to apply runtime configuration", "device", device)
258261
return err
@@ -277,7 +280,7 @@ func (h configurationManager) ResetNicFirmware(ctx context.Context, device *v1al
277280
return nil
278281
}
279282

280-
func NewConfigurationManager(eventRecorder record.EventRecorder) ConfigurationManager {
281-
utils := newConfigurationUtils()
283+
func NewConfigurationManager(eventRecorder record.EventRecorder, dmsManager dms.DMSManager) ConfigurationManager {
284+
utils := newConfigurationUtils(dmsManager)
282285
return configurationManager{configurationUtils: utils, configValidation: newConfigValidation(utils, eventRecorder)}
283286
}

0 commit comments

Comments
 (0)