diff --git a/pkg/agent/qrm-plugins/gpu/baseplugin/base.go b/pkg/agent/qrm-plugins/gpu/baseplugin/base.go index d706545c09..d0e30ca99e 100644 --- a/pkg/agent/qrm-plugins/gpu/baseplugin/base.go +++ b/pkg/agent/qrm-plugins/gpu/baseplugin/base.go @@ -98,6 +98,7 @@ func NewBasePlugin( // Run starts the asynchronous tasks of the plugin func (p *BasePlugin) Run(stopCh <-chan struct{}) { + go p.DeviceTopologyRegistry.Run(stopCh) go func() { select { case <-p.stateInitializedCh: @@ -108,7 +109,6 @@ func (p *BasePlugin) Run(stopCh <-chan struct{}) { return } }() - go p.DeviceTopologyRegistry.Run(stopCh) } // GetState may return a nil state because the state is only initialized when InitState is called. diff --git a/pkg/agent/qrm-plugins/gpu/staticpolicy/policy.go b/pkg/agent/qrm-plugins/gpu/staticpolicy/policy.go index 8b136735b2..de57b7383b 100644 --- a/pkg/agent/qrm-plugins/gpu/staticpolicy/policy.go +++ b/pkg/agent/qrm-plugins/gpu/staticpolicy/policy.go @@ -149,6 +149,8 @@ func (p *StaticPolicy) Start() (err error) { periodicalhandler.ReadyToStartHandlersByGroup(appqrm.QRMGPUPluginPeriodicalHandlerGroupName) }, 5*time.Second, p.stopCh) + p.BasePlugin.Run(p.stopCh) + return nil } diff --git a/pkg/util/machine/device.go b/pkg/util/machine/device.go index 758509ac1b..010831eafe 100644 --- a/pkg/util/machine/device.go +++ b/pkg/util/machine/device.go @@ -224,7 +224,12 @@ func (r *DeviceTopologyRegistry) GetLatestDeviceTopology(deviceNames []string) ( return nil, err } - return PickLatestDeviceTopology(topologiesMap), nil + latestTopology := PickLatestDeviceTopology(topologiesMap) + if latestTopology == nil { + return nil, fmt.Errorf("no latest device topology") + } + + return latestTopology, nil } // GetDeviceNUMAAffinity retrieves a map of a certain device A to the list of devices in device B that it has an affinity with.