Skip to content

Commit f77c610

Browse files
craigconditwilfred-s
authored andcommitted
[YUNIKORN-1190] Account for usage of pods without applicationId (#413)
In plugin mode pods could be scheduled by the default scheduler and not accuonted for in the node usage. This happens for pods without an application ID but with the scheduler set to YuniKorn Closes: #413 Signed-off-by: Wilfred Spiegelenburg <wilfreds@apache.org>
1 parent d728f56 commit f77c610

5 files changed

Lines changed: 79 additions & 8 deletions

File tree

pkg/cache/context.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,11 @@ func (ctx *Context) updatePodInCache(oldObj, newObj interface{}) {
267267
func (ctx *Context) filterPods(obj interface{}) bool {
268268
switch obj := obj.(type) {
269269
case *v1.Pod:
270-
return utils.GeneralPodFilter(obj)
270+
if utils.GeneralPodFilter(obj) {
271+
_, err := utils.GetApplicationIDFromPod(obj)
272+
return err == nil
273+
}
274+
return false
271275
default:
272276
return false
273277
}

pkg/cache/context_recovery.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,10 @@ func (ctx *Context) recover(mgr []interfaces.Recoverable, due time.Duration) err
104104
continue
105105
}
106106
// yunikorn scheduled pods add to existing allocations
107-
if utils.GeneralPodFilter(&pod) {
107+
_, err = utils.GetApplicationIDFromPod(&pod)
108+
ykPod := utils.GeneralPodFilter(&pod) && err == nil
109+
switch {
110+
case ykPod:
108111
if existingAlloc := getExistingAllocation(mgr, &pod); existingAlloc != nil {
109112
log.Logger().Debug("Adding resources for existing pod",
110113
zap.String("appID", existingAlloc.ApplicationID),
@@ -123,7 +126,7 @@ func (ctx *Context) recover(mgr []interfaces.Recoverable, due time.Duration) err
123126
zap.String("nodeName", pod.Spec.NodeName),
124127
zap.Stringer("resources", common.GetPodResource(&pod)))
125128
}
126-
} else if !utils.IsPodTerminated(&pod) {
129+
case !utils.IsPodTerminated(&pod):
127130
// pod is not terminated (succeed or failed) state,
128131
// and it has a node assigned, that means the scheduler
129132
// has already allocated the pod onto a node
@@ -141,7 +144,7 @@ func (ctx *Context) recover(mgr []interfaces.Recoverable, due time.Duration) err
141144
occupiedResource = common.Add(occupiedResource, podResource)
142145
nodeOccupiedResources[pod.Spec.NodeName] = occupiedResource
143146
ctx.nodes.cache.AddPod(&pod)
144-
} else {
147+
default:
145148
log.Logger().Debug("Skipping terminated pod",
146149
zap.String("podUID", string(pod.UID)),
147150
zap.String("podName", fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)))

pkg/cache/context_test.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,9 +263,22 @@ func TestFilterPods(t *testing.T) {
263263
},
264264
Spec: v1.PodSpec{SchedulerName: "default-scheduler"},
265265
}
266+
pod3 := &v1.Pod{
267+
TypeMeta: apis.TypeMeta{
268+
Kind: "Pod",
269+
APIVersion: "v1",
270+
},
271+
ObjectMeta: apis.ObjectMeta{
272+
Name: "yunikorn-test-00003",
273+
UID: "UID-00003",
274+
Labels: map[string]string{"applicationId": "test-00003"},
275+
},
276+
Spec: v1.PodSpec{SchedulerName: "yunikorn"},
277+
}
266278
assert.Check(t, !context.filterPods(nil), "nil object was allowed")
267-
assert.Check(t, context.filterPods(pod1), "yunikorn-managed pod was filtered")
279+
assert.Check(t, !context.filterPods(pod1), "yunikorn-managed pod with no app id was allowed")
268280
assert.Check(t, !context.filterPods(pod2), "non-yunikorn-managed pod was allowed")
281+
assert.Check(t, context.filterPods(pod3), "yunikorn-managed pod was filtered")
269282
}
270283

271284
func TestAddPodToCache(t *testing.T) {

pkg/cache/node_coordinator.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,13 @@ func newNodeResourceCoordinator(nodes *schedulerNodes) *nodeResourceCoordinator
4848

4949
// filter pods that not scheduled by us
5050
func (c *nodeResourceCoordinator) filterPods(obj interface{}) bool {
51-
switch obj.(type) {
51+
switch obj := obj.(type) {
5252
case *v1.Pod:
53-
pod := obj.(*v1.Pod)
54-
return !utils.GeneralPodFilter(pod)
53+
if utils.GeneralPodFilter(obj) {
54+
_, err := utils.GetApplicationIDFromPod(obj)
55+
return err != nil
56+
}
57+
return true
5558
default:
5659
return false
5760
}

pkg/cache/node_coordinator_test.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323

2424
"gotest.tools/assert"
2525
v1 "k8s.io/api/core/v1"
26+
apis "k8s.io/apimachinery/pkg/apis/meta/v1"
2627

2728
"github.com/apache/yunikorn-k8shim/pkg/common/constants"
2829
"github.com/apache/yunikorn-k8shim/pkg/common/utils"
@@ -295,3 +296,50 @@ func TestDeleteTerminatedPod(t *testing.T) {
295296
coordinator.deletePod(pod2)
296297
assert.Equal(t, executed, false)
297298
}
299+
300+
func TestNodeCoordinatorFilterPods(t *testing.T) {
301+
mockedSchedulerAPI := newMockSchedulerAPI()
302+
nodes := newSchedulerNodes(mockedSchedulerAPI, NewTestSchedulerCache())
303+
host1 := utils.NodeForTest(Host1, "10G", "10")
304+
nodes.addNode(host1)
305+
coordinator := newNodeResourceCoordinator(nodes)
306+
307+
pod1 := &v1.Pod{
308+
TypeMeta: apis.TypeMeta{
309+
Kind: "Pod",
310+
APIVersion: "v1",
311+
},
312+
ObjectMeta: apis.ObjectMeta{
313+
Name: "yunikorn-test-00001",
314+
UID: "UID-00001",
315+
},
316+
Spec: v1.PodSpec{SchedulerName: "yunikorn"},
317+
}
318+
pod2 := &v1.Pod{
319+
TypeMeta: apis.TypeMeta{
320+
Kind: "Pod",
321+
APIVersion: "v1",
322+
},
323+
ObjectMeta: apis.ObjectMeta{
324+
Name: "yunikorn-test-00002",
325+
UID: "UID-00002",
326+
},
327+
Spec: v1.PodSpec{SchedulerName: "default-scheduler"},
328+
}
329+
pod3 := &v1.Pod{
330+
TypeMeta: apis.TypeMeta{
331+
Kind: "Pod",
332+
APIVersion: "v1",
333+
},
334+
ObjectMeta: apis.ObjectMeta{
335+
Name: "yunikorn-test-00003",
336+
UID: "UID-00003",
337+
Labels: map[string]string{"applicationId": "test-00003"},
338+
},
339+
Spec: v1.PodSpec{SchedulerName: "yunikorn"},
340+
}
341+
assert.Check(t, !coordinator.filterPods(nil), "nil object was allowed")
342+
assert.Check(t, coordinator.filterPods(pod1), "yunikorn-managed pod with no app id was filtered")
343+
assert.Check(t, coordinator.filterPods(pod2), "non-yunikorn-managed pod was filtered")
344+
assert.Check(t, !coordinator.filterPods(pod3), "yunikorn-managed pod was allowed")
345+
}

0 commit comments

Comments
 (0)