Skip to content

Commit 7bf0045

Browse files
committed
scheduler: account for affinity value of zero in score normalization
If there are no affinities on a job, we don't want to count an affinity score of zero in the number of scores we divide the normalized score by. This is how we handle other scoring components like node reschedule penalties on nodes that weren't running the previous allocation. But we also exclude counting the affinity in the case where we have affinity but the value is zero. In pathological cases, this can result in a node with a low affinity being picked over a node with no affinity, because the denominator is 1 larger. Include zero-value affinities in the count of scores if the job has affinities but the value just happens to be zero. Fixes: #25621
1 parent 21fd0bb commit 7bf0045

File tree

2 files changed

+90
-35
lines changed

2 files changed

+90
-35
lines changed

scheduler/rank.go

+3-4
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,7 @@ func (iter *NodeAffinityIterator) Next() *RankedNode {
956956
return nil
957957
}
958958
if !iter.hasAffinities() {
959+
fmt.Println("NO AFFINITIES!")
959960
iter.ctx.Metrics().ScoreNode(option.Node, "node-affinity", 0)
960961
return option
961962
}
@@ -972,10 +973,8 @@ func (iter *NodeAffinityIterator) Next() *RankedNode {
972973
}
973974
}
974975
normScore := totalAffinityScore / sumWeight
975-
if totalAffinityScore != 0.0 {
976-
option.Scores = append(option.Scores, normScore)
977-
iter.ctx.Metrics().ScoreNode(option.Node, "node-affinity", normScore)
978-
}
976+
option.Scores = append(option.Scores, normScore)
977+
iter.ctx.Metrics().ScoreNode(option.Node, "node-affinity", normScore)
979978
return option
980979
}
981980

scheduler/rank_test.go

+87-31
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@ import (
77
"sort"
88
"testing"
99

10+
"github.com/hashicorp/nomad/ci"
1011
"github.com/hashicorp/nomad/client/lib/idset"
1112
"github.com/hashicorp/nomad/client/lib/numalib"
1213
"github.com/hashicorp/nomad/client/lib/numalib/hw"
1314
"github.com/hashicorp/nomad/helper/uuid"
1415
"github.com/hashicorp/nomad/nomad/mock"
1516
"github.com/hashicorp/nomad/nomad/structs"
17+
"github.com/shoenig/test"
1618
"github.com/shoenig/test/must"
1719
"github.com/stretchr/testify/require"
1820
)
@@ -2261,18 +2263,29 @@ func TestScoreNormalizationIterator(t *testing.T) {
22612263
}
22622264

22632265
func TestNodeAffinityIterator(t *testing.T) {
2266+
ci.Parallel(t)
22642267
_, ctx := testContext(t)
2265-
nodes := []*RankedNode{
2266-
{Node: mock.Node()},
2267-
{Node: mock.Node()},
2268-
{Node: mock.Node()},
2269-
{Node: mock.Node()},
2270-
}
22712268

2272-
nodes[0].Node.Attributes["kernel.version"] = "4.9"
2273-
nodes[1].Node.Datacenter = "dc2"
2274-
nodes[2].Node.Datacenter = "dc2"
2275-
nodes[2].Node.NodeClass = "large"
2269+
testNodes := func() []*RankedNode {
2270+
nodes := []*RankedNode{
2271+
{Node: mock.Node()},
2272+
{Node: mock.Node()},
2273+
{Node: mock.Node()},
2274+
{Node: mock.Node()},
2275+
{Node: mock.Node()},
2276+
}
2277+
2278+
nodes[0].Node.Attributes["kernel.version"] = "4.9"
2279+
nodes[1].Node.Datacenter = "dc2"
2280+
nodes[2].Node.Datacenter = "dc2"
2281+
nodes[2].Node.NodeClass = "large"
2282+
2283+
// this node should have zero affinity
2284+
nodes[4].Node.Attributes["kernel.version"] = "2.6"
2285+
nodes[4].Node.Datacenter = "dc3"
2286+
nodes[4].Node.NodeClass = "weird"
2287+
return nodes
2288+
}
22762289

22772290
affinities := []*structs.Affinity{
22782291
{
@@ -2300,37 +2313,80 @@ func TestNodeAffinityIterator(t *testing.T) {
23002313
Weight: 50,
23012314
},
23022315
}
2303-
2304-
static := NewStaticRankIterator(ctx, nodes)
2305-
23062316
job := mock.Job()
23072317
job.ID = "foo"
23082318
tg := job.TaskGroups[0]
23092319
tg.Affinities = affinities
23102320

2311-
nodeAffinity := NewNodeAffinityIterator(ctx, static)
2312-
nodeAffinity.SetTaskGroup(tg)
2321+
t.Run("affinity alone", func(t *testing.T) {
2322+
static := NewStaticRankIterator(ctx, testNodes())
23132323

2314-
scoreNorm := NewScoreNormalizationIterator(ctx, nodeAffinity)
2324+
nodeAffinity := NewNodeAffinityIterator(ctx, static)
2325+
nodeAffinity.SetTaskGroup(tg)
23152326

2316-
out := collectRanked(scoreNorm)
2317-
expectedScores := make(map[string]float64)
2318-
// Total weight = 300
2319-
// Node 0 matches two affinities(dc and kernel version), total weight = 150
2320-
expectedScores[nodes[0].Node.ID] = 0.5
2327+
scoreNorm := NewScoreNormalizationIterator(ctx, nodeAffinity)
2328+
out := collectRanked(scoreNorm)
23212329

2322-
// Node 1 matches an anti affinity, weight = -100
2323-
expectedScores[nodes[1].Node.ID] = -(1.0 / 3.0)
2330+
// Total weight = 300
2331+
// Node 0 matches two affinities(dc and kernel version), total weight = 150
2332+
test.Eq(t, 0.5, out[0].FinalScore)
23242333

2325-
// Node 2 matches one affinity(node class) with weight 50
2326-
expectedScores[nodes[2].Node.ID] = -(1.0 / 6.0)
2334+
// Node 1 matches an anti affinity, weight = -100
2335+
test.Eq(t, -(1.0 / 3.0), out[1].FinalScore)
23272336

2328-
// Node 3 matches one affinity (dc) with weight = 100
2329-
expectedScores[nodes[3].Node.ID] = 1.0 / 3.0
2337+
// Node 2 matches one affinity(node class) with weight 50
2338+
test.Eq(t, -(1.0 / 6.0), out[2].FinalScore)
23302339

2331-
require := require.New(t)
2332-
for _, n := range out {
2333-
require.Equal(expectedScores[n.Node.ID], n.FinalScore)
2334-
}
2340+
// Node 3 matches one affinity (dc) with weight = 100
2341+
test.Eq(t, 1.0/3.0, out[3].FinalScore)
2342+
2343+
// Node 4 matches no affinities but should still generate a score
2344+
test.Eq(t, 0, out[4].FinalScore)
2345+
test.Len(t, 1, out[4].Scores)
2346+
})
2347+
2348+
t.Run("affinity with binpack", func(t *testing.T) {
2349+
nodes := testNodes()
2350+
static := NewStaticRankIterator(ctx, nodes)
2351+
2352+
// include a binpack iterator so we can see the impact on including zero
2353+
// values in final scores. The nodes are all empty so score contribution
2354+
// from binpacking will be identical
2355+
2356+
binp := NewBinPackIterator(ctx, static, false, 0)
2357+
binp.SetTaskGroup(tg)
2358+
fit := structs.ScoreFitBinPack(nodes[0].Node, &structs.ComparableResources{
2359+
Flattened: structs.AllocatedTaskResources{
2360+
Cpu: structs.AllocatedCpuResources{CpuShares: 500},
2361+
Memory: structs.AllocatedMemoryResources{MemoryMB: 256},
2362+
},
2363+
})
2364+
bp := fit / binPackingMaxFitScore
2365+
2366+
nodeAffinity := NewNodeAffinityIterator(ctx, binp)
2367+
nodeAffinity.SetTaskGroup(tg)
2368+
2369+
scoreNorm := NewScoreNormalizationIterator(ctx, nodeAffinity)
2370+
out := collectRanked(scoreNorm)
2371+
2372+
// Total weight = 300
2373+
// Node 0 matches two affinities(dc and kernel version), total weight = 150
2374+
test.Eq(t, (0.5+bp)/2, out[0].FinalScore)
2375+
2376+
// Node 1 matches an anti affinity, weight = -100
2377+
test.Eq(t, (-(1.0/3.0)+bp)/2, out[1].FinalScore)
2378+
2379+
// Node 2 matches one affinity(node class) with weight 50
2380+
test.Eq(t, (-(1.0/6.0)+bp)/2, out[2].FinalScore)
2381+
2382+
// Node 3 matches one affinity (dc) with weight = 100
2383+
test.Eq(t, ((1.0/3.0)+bp)/2, out[3].FinalScore)
23352384

2385+
// Node 4 matches no affinities but should still generate a score and
2386+
// the final score should be lower than any positive score
2387+
test.Eq(t, (0+bp)/2, out[4].FinalScore)
2388+
test.Len(t, 2, out[4].Scores)
2389+
test.Less(t, out[0].FinalScore, out[4].FinalScore)
2390+
test.Less(t, out[3].FinalScore, out[4].FinalScore)
2391+
})
23362392
}

0 commit comments

Comments
 (0)