Skip to content

Commit 8858e98

Browse files
author
fanhy36
committed
fix issue75: seg fault and listen volcano.sock fail
Signed-off-by: fanhy36 <[email protected]> delete empty line Signed-off-by: fanhy36 <[email protected]>
1 parent 09e3c9f commit 8858e98

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

pkg/plugin/nvidia/server.go

+12-2
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,17 @@ func (m *NvidiaDevicePlugin) DevicesNum() int {
188188
func (m *NvidiaDevicePlugin) Serve() error {
189189
sock, err := net.Listen("unix", m.socket)
190190
if err != nil {
191-
return err
191+
log.Printf("Listen sock fail and retry for '%s': %s", m.resourceName, err)
192+
err = os.Remove(m.socket)
193+
if err != nil {
194+
log.Printf("Error deleting file: %s, %v\n", m.socket, err)
195+
return err
196+
}
197+
sock, err = net.Listen("unix", m.socket)
198+
if err != nil {
199+
log.Printf("Retry Listen sock fail '%s': %s", m.resourceName, err)
200+
return err
201+
}
192202
}
193203

194204
pluginapi.RegisterDevicePluginServer(m.server, m)
@@ -343,6 +353,7 @@ func (m *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.Alloc
343353
}
344354

345355
sort.Sort(availablePods)
356+
util.UseClient(m.kubeInteractor.clientset)
346357

347358
var candidatePod *v1.Pod
348359
for _, pod := range availablePods {
@@ -406,7 +417,6 @@ Allocate:
406417
return nil, fmt.Errorf("failed to update pod annotation %v", err)
407418
}
408419

409-
util.UseClient(m.kubeInteractor.clientset)
410420
klog.V(3).Infoln("Releasing lock: nodeName=", m.kubeInteractor.nodeName)
411421
err = util.ReleaseNodeLock(m.kubeInteractor.nodeName, "gpu")
412422
if err != nil {

0 commit comments

Comments
 (0)