Skip to content

Commit 44008ac

Browse files
committed
fix free container logic
1 parent 688f345 commit 44008ac

File tree

2 files changed

+33
-30
lines changed

2 files changed

+33
-30
lines changed

pkg/device_plugin/api.go

Lines changed: 29 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -160,12 +160,24 @@ func (sas *SandboxAPIServer) resolveDevicePath(devId string) (string, error) {
160160
// and returns the list of physical devices that can be given to the pod
161161
func (sas *SandboxAPIServer) AllocatePodDevices(ctx context.Context, pr *cdiresolver.PodRequest) (*cdiresolver.PhysicalDeviceResponse, error) {
162162
log.Printf("[Info] Pod Allocate Request: %v", pr)
163+
164+
response := &cdiresolver.PhysicalDeviceResponse{}
165+
166+
// check if this pod already has some devices assigned
167+
podDevices, exists := sas.podDeviceIDMap[pr.PodID]
168+
if exists && len(podDevices) == int(pr.Count) {
169+
response.PhysicalDeviceID = podDevices
170+
log.Printf("[Info] Pod (already exists) Allocate Response: %v", response)
171+
return response, nil
172+
}
173+
163174
deviceType := pr.DeviceType
164175
deviceList := sas.deviceIDs[deviceType]
165176
if len(deviceList) < int(pr.Count) {
166-
return nil, fmt.Errorf("Not enough '%q' devices available for request '%v'", deviceType, pr)
177+
err := fmt.Errorf("Not enough '%q' devices available for request '%v'", deviceType, pr)
178+
log.Print(err)
179+
return nil, err
167180
}
168-
response := &cdiresolver.PhysicalDeviceResponse{}
169181
physicalDevices := []string{}
170182
// podRequest has three fields : PodID, Count, DeviceType
171183
// get 'Count' number of GPUs from deviceIDs map
@@ -177,7 +189,7 @@ func (sas *SandboxAPIServer) AllocatePodDevices(ctx context.Context, pr *cdireso
177189
sas.deviceIDs[deviceType] = deviceList
178190

179191
// store the association in pod mapping
180-
podDevices := sas.podDeviceIDMap[pr.PodID]
192+
podDevices = sas.podDeviceIDMap[pr.PodID]
181193
podDevices = append(podDevices, dev)
182194
sas.podDeviceIDMap[pr.PodID] = podDevices
183195

@@ -209,9 +221,9 @@ func (sas *SandboxAPIServer) AllocateContainerDevices(ctx context.Context, cr *c
209221

210222
// now we get the physical devices associated with PodID
211223
physicalDevices := sas.podDeviceIDMap[cr.PodID]
212-
for _, cid := range cr.VirtualDeviceID {
213-
if _, ok := sas.virtualDeviceIDMap[cid]; ok {
214-
err := fmt.Errorf("Virtual Device ID %s is already taken", cid)
224+
for _, vid := range cr.VirtualDeviceID {
225+
if _, ok := sas.virtualDeviceIDMap[vid]; ok {
226+
err := fmt.Errorf("Virtual Device ID %s is already taken", vid)
215227
log.Print(err)
216228
return nil, err
217229
}
@@ -230,18 +242,18 @@ func (sas *SandboxAPIServer) AllocateContainerDevices(ctx context.Context, cr *c
230242
log.Printf("Bad device %v in ContainerAllocate: %v", physDev, err)
231243
continue
232244
}
233-
// assign it to cid
245+
// assign it to vid
234246
containerDevices := sas.containerDeviceIDMap[cr.ContainerID]
235-
containerDevices = append(containerDevices, cid)
247+
containerDevices = append(containerDevices, vid)
236248
sas.containerDeviceIDMap[cr.ContainerID] = containerDevices
237-
sas.virtualDeviceIDMap[cid] = physDev
238-
sas.deviceVirtualIDMap[physDev] = cid
249+
sas.virtualDeviceIDMap[vid] = physDev
250+
sas.deviceVirtualIDMap[physDev] = vid
239251
response.PhysicalDeviceID = append(response.PhysicalDeviceID, devicePath)
240252
assigned = true
241253
break
242254
}
243255
if !assigned {
244-
err := fmt.Errorf("Could not find a suitable physical device for %q. Request: %v", cid, cr)
256+
err := fmt.Errorf("Could not find a suitable physical device for %q. Request: %v", vid, cr)
245257
log.Print(err)
246258
return response, err
247259
}
@@ -254,24 +266,15 @@ func (sas *SandboxAPIServer) FreeContainerDevices(ctx context.Context, cr *cdire
254266
log.Printf("[Info] Free Container Request: %v", cr)
255267
response := &cdiresolver.PhysicalDeviceResponse{}
256268
// dis-associate container's virtual ids with the physical devices
257-
removeMap := make(map[string]bool)
258-
for _, cid := range cr.VirtualDeviceID {
259-
removeMap[cid] = true
260-
}
261269
vids := sas.containerDeviceIDMap[cr.ContainerID]
262-
remainingVirtualDevices := []string{}
263270
for _, vid := range vids {
264-
if !removeMap[vid] {
265-
remainingVirtualDevices = append(remainingVirtualDevices, vid)
266-
} else {
267-
// remove the virtual to physical mapping
268-
physDev := sas.virtualDeviceIDMap[vid]
269-
delete(sas.virtualDeviceIDMap, vid)
270-
delete(sas.deviceVirtualIDMap, physDev)
271-
response.PhysicalDeviceID = append(response.PhysicalDeviceID, physDev)
272-
}
271+
// remove the virtual to physical mapping
272+
physDev := sas.virtualDeviceIDMap[vid]
273+
delete(sas.virtualDeviceIDMap, vid)
274+
delete(sas.deviceVirtualIDMap, physDev)
275+
response.PhysicalDeviceID = append(response.PhysicalDeviceID, physDev)
273276
}
274-
sas.containerDeviceIDMap[cr.ContainerID] = remainingVirtualDevices
277+
delete(sas.containerDeviceIDMap, cr.ContainerID)
275278

276279
// remove container from pod's container map
277280
containers := sas.podContainerIDMap[cr.PodID]

pkg/device_plugin/generic_device_plugin.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ func (dpi *GenericDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.Dev
252252

253253
// Performs pre allocation checks and allocates a virtual cold plug device based on the request
254254
func (dpi *GenericDevicePlugin) ColdPlugAllocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
255-
log.Println("In cold plug allocate")
255+
log.Printf("[INFO] Cold Plug Allocate request: %v", reqs)
256256
responses := pluginapi.AllocateResponse{}
257257
envList := map[string][]string{}
258258
for _, req := range reqs.ContainerRequests {
@@ -262,8 +262,8 @@ func (dpi *GenericDevicePlugin) ColdPlugAllocate(ctx context.Context, reqs *plug
262262
// cannot prepare the devices at this point, can't even check if they belong to a valid iommu/vendor
263263
devAddrs := []string{devId}
264264
deviceSpecs = append(deviceSpecs, &pluginapi.DeviceSpec{
265-
HostPath: cdiresolverPath,
266-
ContainerPath: cdiresolverPath,
265+
HostPath: filepath.Join(cdiresolverPath, devId),
266+
ContainerPath: filepath.Join(cdiresolverPath, devId),
267267
Permissions: "mrw",
268268
})
269269

@@ -274,7 +274,6 @@ func (dpi *GenericDevicePlugin) ColdPlugAllocate(ctx context.Context, reqs *plug
274274
envList[key] = append(envList[key], devAddrs...)
275275
}
276276
envs := buildEnv(envList)
277-
log.Printf("Cold plug Allocated devices %s", envs)
278277
response := pluginapi.ContainerAllocateResponse{
279278
Envs: envs,
280279
Devices: deviceSpecs,
@@ -283,6 +282,7 @@ func (dpi *GenericDevicePlugin) ColdPlugAllocate(ctx context.Context, reqs *plug
283282
responses.ContainerResponses = append(responses.ContainerResponses, &response)
284283
}
285284

285+
log.Printf("[INFO] Cold Plug Allocate response: %v", responses)
286286
return &responses, nil
287287
}
288288

0 commit comments

Comments
 (0)