@@ -28,6 +28,7 @@ import (
2828 "testing"
2929 "time"
3030
31+ "github.com/google/go-cmp/cmp"
3132 "github.com/stretchr/testify/assert"
3233 . "go.uber.org/mock/gomock"
3334 v1 "k8s.io/api/core/v1"
@@ -54,6 +55,73 @@ func nodeInfoEqual(l, r *NodeInfo) bool {
5455 return reflect .DeepEqual (l , r )
5556}
5657
58+ func nodeInfoEqualExplainable (l , r * NodeInfo ) error {
59+ var errors []error
60+
61+ if l .Name != r .Name {
62+ errors = append (errors , fmt .Errorf ("Name: exp %v, got %v" , l .Name , r .Name ))
63+ }
64+ if ! reflect .DeepEqual (l .Node , r .Node ) {
65+ errors = append (errors , fmt .Errorf ("Node: exp %v, got %v\n diff:\n %s" , l .Node , r .Node , cmp .Diff (l .Node , r .Node )))
66+ }
67+ if ! reflect .DeepEqual (l .Releasing , r .Releasing ) {
68+ errors = append (errors , fmt .Errorf ("Releasing: exp %v, got %v" , l .Releasing , r .Releasing ))
69+ }
70+ if ! reflect .DeepEqual (l .Idle , r .Idle ) {
71+ errors = append (errors , fmt .Errorf ("Idle: exp %v, got %v" , l .Idle , r .Idle ))
72+ }
73+ if ! reflect .DeepEqual (l .Used , r .Used ) {
74+ errors = append (errors , fmt .Errorf ("Used: exp %v, got %v" , l .Used , r .Used ))
75+ }
76+ if ! reflect .DeepEqual (l .Allocatable , r .Allocatable ) {
77+ lScalarResources := l .Allocatable .ScalarResources ()
78+ rScalarResources := r .Allocatable .ScalarResources ()
79+ if ! reflect .DeepEqual (lScalarResources , rScalarResources ) {
80+ errors = append (errors , fmt .Errorf ("Allocatable: exp %v, got %v\n diff:\n %s" , lScalarResources , rScalarResources , cmp .Diff (lScalarResources , rScalarResources )))
81+ }
82+ if ! reflect .DeepEqual (l .Allocatable .GPUs (), r .Allocatable .GPUs ()) {
83+ errors = append (errors , fmt .Errorf ("Allocatable GPUs: exp %v, got %v" , l .Allocatable .GPUs (), r .Allocatable .GPUs ()))
84+ }
85+ if ! reflect .DeepEqual (l .Allocatable .Cpu (), r .Allocatable .Cpu ()) {
86+ errors = append (errors , fmt .Errorf ("Allocatable Cpu: exp %v, got %v" , l .Allocatable .Cpu (), r .Allocatable .Cpu ()))
87+ }
88+ if ! reflect .DeepEqual (l .Allocatable .Memory (), r .Allocatable .Memory ()) {
89+ errors = append (errors , fmt .Errorf ("Allocatable Memory: exp %v, got %v" , l .Allocatable .Memory (), r .Allocatable .Memory ()))
90+ }
91+ }
92+ if ! reflect .DeepEqual (l .AccessibleStorageCapacities , r .AccessibleStorageCapacities ) {
93+ errors = append (errors , fmt .Errorf ("AccessibleStorageCapacities: exp %v, got %v" , l .AccessibleStorageCapacities , r .AccessibleStorageCapacities ))
94+ }
95+ if ! reflect .DeepEqual (l .PodInfos , r .PodInfos ) {
96+ errors = append (errors , fmt .Errorf ("PodInfos: exp %v, got %v" , l .PodInfos , r .PodInfos ))
97+ }
98+ if l .MaxTaskNum != r .MaxTaskNum {
99+ errors = append (errors , fmt .Errorf ("MaxTaskNum: exp %v, got %v" , l .MaxTaskNum , r .MaxTaskNum ))
100+ }
101+ if l .MemoryOfEveryGpuOnNode != r .MemoryOfEveryGpuOnNode {
102+ errors = append (errors , fmt .Errorf ("MemoryOfEveryGpuOnNode: exp %v, got %v" , l .MemoryOfEveryGpuOnNode , r .MemoryOfEveryGpuOnNode ))
103+ }
104+ if l .GpuMemorySynced != r .GpuMemorySynced {
105+ errors = append (errors , fmt .Errorf ("GpuMemorySynced: exp %v, got %v" , l .GpuMemorySynced , r .GpuMemorySynced ))
106+ }
107+ if ! reflect .DeepEqual (l .LegacyMIGTasks , r .LegacyMIGTasks ) {
108+ errors = append (errors , fmt .Errorf ("LegacyMIGTasks: exp %v, got %v" , l .LegacyMIGTasks , r .LegacyMIGTasks ))
109+ }
110+ if l .HasDRAGPUs != r .HasDRAGPUs {
111+ errors = append (errors , fmt .Errorf ("HasDRAGPUs: exp %v, got %v" , l .HasDRAGPUs , r .HasDRAGPUs ))
112+ }
113+ if ! reflect .DeepEqual (l .GpuSharingNodeInfo , r .GpuSharingNodeInfo ) {
114+ errors = append (errors , fmt .Errorf ("GpuSharingNodeInfo: exp %v, got %v" , l .GpuSharingNodeInfo , r .GpuSharingNodeInfo ))
115+ }
116+ if len (errors ) > 0 {
117+ return fmt .Errorf ("node info: \n %v" , errors )
118+ }
119+ if ! reflect .DeepEqual (l , r ) {
120+ errors = append (errors , fmt .Errorf ("unknown difference: exp %v, got %v" , l , r ))
121+ }
122+ return nil
123+ }
124+
57125type AddRemovePodsTest struct {
58126 name string
59127 node * v1.Node
@@ -217,7 +285,7 @@ func TestAddRemovePods(t *testing.T) {
217285 {
218286 name : "releasing pod" ,
219287 node : common_info .BuildNode ("n1" ,
220- common_info .BuildResourceListWithGPU ("8000m" , "10G" , "1" )),
288+ common_info .BuildResourceListWithGPUAndPods ("8000m" , "10G" , "1" , "110 " )),
221289 podsInfoMetadata : []podInfoMetadata {
222290 {
223291 pod : common_info .BuildPod ("c1" , "p1" , "n1" , v1 .PodRunning ,
@@ -232,7 +300,7 @@ func TestAddRemovePods(t *testing.T) {
232300 },
233301 addedPodsNodeInfo : & NodeInfo {
234302 Name : "n1" ,
235- Idle : common_info .BuildResourceWithGpu ("7000m" , "9G" , "0" , "110 " ),
303+ Idle : common_info .BuildResourceWithGpu ("7000m" , "9G" , "0" , "109 " ),
236304 Used : common_info .BuildResourceWithGpu ("1000m" , "1G" , "0" , "1" ),
237305 Releasing : common_info .BuildResourceWithGpu ("1000m" , "1G" , "1" , "1" ),
238306 Allocatable : common_info .BuildResourceWithGpu ("8000m" , "10G" , "1" , "110" ),
@@ -271,7 +339,7 @@ func TestAddRemovePods(t *testing.T) {
271339 {
272340 name : "pipelined pod - different gpus" ,
273341 node : common_info .BuildNode ("n1" ,
274- common_info .BuildResourceListWithGPU ("8000m" , "10G" , "1" )),
342+ common_info .BuildResourceListWithGPUAndPods ("8000m" , "10G" , "1" , "110 " )),
275343 podsInfoMetadata : []podInfoMetadata {
276344 {
277345 pod : common_info .BuildPod ("c1" , "p1" , "n1" , v1 .PodRunning ,
@@ -296,9 +364,9 @@ func TestAddRemovePods(t *testing.T) {
296364 },
297365 addedPodsNodeInfo : & NodeInfo {
298366 Name : "n1" ,
299- Idle : common_info .BuildResourceWithGpu ("7000m" , "9G" , "0" , "110 " ),
300- Used : common_info .BuildResourceWithGpu ("1500m" , "2G" , "0" , "1 " ),
301- Releasing : common_info .BuildResourceWithGpu ("500m" , "0G" , "0" , "1 " ),
367+ Idle : common_info .BuildResourceWithGpu ("7000m" , "9G" , "0" , "109 " ),
368+ Used : common_info .BuildResourceWithGpu ("1500m" , "2G" , "0" , "2 " ),
369+ Releasing : common_info .BuildResourceWithGpuNoPods ("500m" , "0G" , "0" ),
302370 Allocatable : common_info .BuildResourceWithGpu ("8000m" , "10G" , "1" , "110" ),
303371 PodInfos : map [common_info.PodID ]* pod_info.PodInfo {},
304372 LegacyMIGTasks : map [common_info.PodID ]string {},
@@ -340,7 +408,7 @@ func TestAddRemovePods(t *testing.T) {
340408 {
341409 name : "pipelined pod - same gpus" ,
342410 node : common_info .BuildNode ("n1" ,
343- common_info .BuildResourceListWithGPU ("8000m" , "10G" , "1" )),
411+ common_info .BuildResourceListWithGPUAndPods ("8000m" , "10G" , "1" , "110 " )),
344412 podsInfoMetadata : []podInfoMetadata {
345413 {
346414 pod : common_info .BuildPod ("c1" , "p1" , "n1" , v1 .PodRunning ,
@@ -375,9 +443,9 @@ func TestAddRemovePods(t *testing.T) {
375443 },
376444 addedPodsNodeInfo : & NodeInfo {
377445 Name : "n1" ,
378- Idle : common_info .BuildResourceWithGpu ("6000m" , "8G" , "0" , "110 " ),
379- Used : common_info .BuildResourceWithGpu ("2500m" , "3G" , "0" , "1 " ),
380- Releasing : common_info .BuildResourceWithGpu ("500m" , "0G" , "0" , "1 " ),
446+ Idle : common_info .BuildResourceWithGpu ("6000m" , "8G" , "0" , "108 " ),
447+ Used : common_info .BuildResourceWithGpu ("2500m" , "3G" , "0" , "3 " ),
448+ Releasing : common_info .BuildResourceWithGpuNoPods ("500m" , "0G" , "0" ),
381449 Allocatable : common_info .BuildResourceWithGpu ("8000m" , "10G" , "1" , "110" ),
382450 PodInfos : map [common_info.PodID ]* pod_info.PodInfo {},
383451 LegacyMIGTasks : map [common_info.PodID ]string {},
@@ -419,14 +487,8 @@ func TestAddRemovePods(t *testing.T) {
419487 test .addedPodsNodeInfo .Node = test .node
420488 test .removedPodsNodeInfo .Node = test .node
421489
422- numPods := len (test .podsInfoMetadata )
423490 test .addedPodsNodeInfo .MaxTaskNum = 110
424491 test .removedPodsNodeInfo .MaxTaskNum = 110
425- test .addedPodsNodeInfo .Allocatable .ScalarResources ()[resource_info .PodsResourceName ] = 110
426- test .addedPodsNodeInfo .Used .ScalarResources ()[resource_info .PodsResourceName ] = int64 (numPods )
427- test .addedPodsNodeInfo .Idle .ScalarResources ()[resource_info .PodsResourceName ] = int64 (110 - numPods )
428- test .removedPodsNodeInfo .Allocatable .ScalarResources ()[resource_info .PodsResourceName ] = 110
429- test .removedPodsNodeInfo .Idle .ScalarResources ()[resource_info .PodsResourceName ] = 110
430492
431493 controller := NewController (t )
432494 nodePodAffinityInfoAdded := pod_affinity .NewMockNodePodAffinityInfo (controller )
@@ -446,9 +508,8 @@ func TestAddRemovePods(t *testing.T) {
446508 podInfoKey := common_info .PodID (fmt .Sprintf ("%s/%s" , podInfo .Namespace , podInfo .Name ))
447509 test .addedPodsNodeInfo .PodInfos [podInfoKey ] = podInfo .Clone ()
448510 }
449- if ! nodeInfoEqual (ni , test .addedPodsNodeInfo ) {
450- t .Errorf ("pods added info: \n expected %v, \n got %v \n " ,
451- test .addedPodsNodeInfo , ni )
511+ if err := nodeInfoEqualExplainable (ni , test .addedPodsNodeInfo ); err != nil {
512+ t .Errorf ("Diff in node info after adding pods: \n %v" , err )
452513 }
453514
454515 nodePodAffinityInfoRemoved := pod_affinity .NewMockNodePodAffinityInfo (controller )
@@ -462,9 +523,8 @@ func TestAddRemovePods(t *testing.T) {
462523 for _ , podInfo := range podsInfo {
463524 _ = ni .RemoveTask (podInfo )
464525 }
465- if ! nodeInfoEqual (ni , test .removedPodsNodeInfo ) {
466- t .Errorf ("pods removed info: \n expected %v, \n got %v \n " ,
467- test .removedPodsNodeInfo , ni )
526+ if err := nodeInfoEqualExplainable (ni , test .removedPodsNodeInfo ); err != nil {
527+ t .Errorf ("Diff in node info after removing pods: \n %v" , err )
468528 }
469529 })
470530 }
@@ -538,14 +598,14 @@ func TestIsTaskAllocatable(t *testing.T) {
538598 expectedMessageContains : []string {"GPU" },
539599 },
540600 "already used gpu so missing gpu" : {
541- node : common_info .BuildNode ("n1" , common_info .BuildResourceListWithGPU ("2000m" , "2G" , "1" )),
601+ node : common_info .BuildNode ("n1" , common_info .BuildResourceListWithGPUAndPods ("2000m" , "2G" , "1" , "110 " )),
542602 podsResources : []v1.ResourceList {common_info .BuildResourceListWithGPU ("1000m" , "1G" , "1" )},
543603 podResourcesToAllocate : common_info .BuildResourceListWithGPU ("1000m" , "1G" , "1" ),
544604 expected : false ,
545605 expectedMessageContains : []string {"GPU" },
546606 },
547607 "enough cpu memory and gpu" : {
548- node : common_info .BuildNode ("n1" , common_info .BuildResourceListWithGPU ("2000m" , "2G" , "2" )),
608+ node : common_info .BuildNode ("n1" , common_info .BuildResourceListWithGPUAndPods ("2000m" , "2G" , "2" , "110 " )),
549609 podsResources : []v1.ResourceList {common_info .BuildResourceListWithGPU ("1000m" , "1G" , "1" )},
550610 podResourcesToAllocate : common_info .BuildResourceListWithGPU ("1000m" , "1G" , "1" ),
551611 expected : true ,
@@ -602,10 +662,10 @@ func TestIsTaskAllocatable(t *testing.T) {
602662}
603663
604664func TestIsTaskAllocatableOnReleasingOrIdle (t * testing.T ) {
605- singleMigNode := common_info .BuildNode ("single-mig" , common_info .BuildResourceListWithGPU ("2000m" , "2G" , "8" ))
665+ singleMigNode := common_info .BuildNode ("single-mig" , common_info .BuildResourceListWithGPUAndPods ("2000m" , "2G" , "8" , "110 " ))
606666 singleMigNode .Labels [migEnabledLabelKey ] = "true"
607667
608- mixedMigNode := common_info .BuildNode ("mixed-mig" , common_info .BuildResourceListWithGPU ("2000m" , "2G" , "8" ))
668+ mixedMigNode := common_info .BuildNode ("mixed-mig" , common_info .BuildResourceListWithGPUAndPods ("2000m" , "2G" , "8" , "110 " ))
609669 mixedMigNode .Labels [migEnabledLabelKey ] = "true"
610670 mixedMigNode .Labels [commonconstants .GpuCountLabel ] = "8"
611671 mixedMigNode .Labels [GpuMemoryLabel ] = "40"
0 commit comments