@@ -58,6 +58,7 @@ const (
5858 sandboxDevicePluginAssetsPath = "assets/state-sandbox-device-plugin"
5959 devicePluginAssetsPath = "assets/state-device-plugin/"
6060 dcgmExporterAssetsPath = "assets/state-dcgm-exporter/"
61+ migManagerAssetsPath = "assets/state-mig-manager/"
6162 nfdNvidiaPCILabelKey = "feature.node.kubernetes.io/pci-10de.present"
6263 upgradedKernel = "5.4.135-generic"
6364)
@@ -423,6 +424,24 @@ func testDaemonsetCommon(t *testing.T, cp *gpuv1.ClusterPolicy, component string
423424 if err != nil {
424425 return nil , fmt .Errorf ("unable to get mainCtrImage for dcgm-exporter: %v" , err )
425426 }
427+ case "MIGManager" :
428+ spec = commonDaemonsetSpec {
429+ repository : cp .Spec .MIGManager .Repository ,
430+ image : cp .Spec .MIGManager .Image ,
431+ version : cp .Spec .MIGManager .Version ,
432+ imagePullPolicy : cp .Spec .MIGManager .ImagePullPolicy ,
433+ imagePullSecrets : getImagePullSecrets (cp .Spec .MIGManager .ImagePullSecrets ),
434+ args : cp .Spec .MIGManager .Args ,
435+ env : cp .Spec .MIGManager .Env ,
436+ resources : cp .Spec .MIGManager .Resources ,
437+ }
438+ dsLabel = "nvidia-mig-manager"
439+ mainCtrName = "nvidia-mig-manager"
440+ manifestFile = filepath .Join (cfg .root , migManagerAssetsPath )
441+ mainCtrImage , err = gpuv1 .ImagePath (& cp .Spec .MIGManager )
442+ if err != nil {
443+ return nil , fmt .Errorf ("unable to get mainCtrImage for mig-manager: %v" , err )
444+ }
426445 default :
427446 return nil , fmt .Errorf ("invalid component for testDaemonsetCommon(): %s" , component )
428447 }
@@ -1479,3 +1498,136 @@ func TestCertConfigPathMap(t *testing.T) {
14791498 require .Equal (t , expectedPath , path , "Incorrect path for OS %s" , os )
14801499 }
14811500}
1501+
1502+ // getMIGManagerTestInput returns a ClusterPolicy instance for a particular
1503+ // MIG Manager test case. This function will grow as new test cases are added
1504+ func getMIGManagerTestInput (testCase string ) * gpuv1.ClusterPolicy {
1505+ cp := clusterPolicy .DeepCopy ()
1506+
1507+ // Set default values for MIG Manager
1508+ cp .Spec .MIGManager .Repository = "nvcr.io/nvidia/cloud-native"
1509+ cp .Spec .MIGManager .Image = "k8s-mig-manager"
1510+ cp .Spec .MIGManager .Version = "v0.5.0"
1511+ cp .Spec .MIGManager .ImagePullSecrets = []string {"ngc-secret" }
1512+
1513+ // Validator is required for all daemonset tests
1514+ cp .Spec .Validator .Repository = "nvcr.io/nvidia/cloud-native"
1515+ cp .Spec .Validator .Image = "gpu-operator-validator"
1516+ cp .Spec .Validator .Version = "v1.11.0"
1517+ cp .Spec .Validator .ImagePullSecrets = []string {"ngc-secret" }
1518+
1519+ switch testCase {
1520+ case "default" :
1521+ // No custom config
1522+ case "custom-config" :
1523+ cp .Spec .MIGManager .Config = & gpuv1.MIGPartedConfigSpec {Name : "custom-mig-config" }
1524+ default :
1525+ return nil
1526+ }
1527+
1528+ return cp
1529+ }
1530+
1531+ // getMIGManagerTestOutput returns a map containing expected output for
1532+ // MIG Manager test case. This function will grow as new test cases are added
1533+ func getMIGManagerTestOutput (testCase string ) map [string ]interface {} {
1534+ // default output
1535+ output := map [string ]interface {}{
1536+ "numDaemonsets" : 1 ,
1537+ "migManagerImage" : "nvcr.io/nvidia/cloud-native/k8s-mig-manager:v0.5.0" ,
1538+ "imagePullSecret" : "ngc-secret" ,
1539+ "migConfigVolumePresent" : false ,
1540+ "env" : map [string ]string {},
1541+ }
1542+
1543+ switch testCase {
1544+ case "default" :
1545+ // No config volume
1546+ case "custom-config" :
1547+ output ["migConfigVolumePresent" ] = true
1548+ output ["env" ] = map [string ]string {
1549+ "CONFIG_FILE" : "/mig-parted-config/config.yaml" ,
1550+ }
1551+ default :
1552+ return nil
1553+ }
1554+
1555+ return output
1556+ }
1557+
1558+ // TestMIGManager tests that the GPU Operator correctly deploys the mig-manager daemonset
1559+ // under various scenarios/config options
1560+ func TestMIGManager (t * testing.T ) {
1561+ testCases := []struct {
1562+ description string
1563+ clusterPolicy * gpuv1.ClusterPolicy
1564+ output map [string ]interface {}
1565+ }{
1566+ {
1567+ "Default" ,
1568+ getMIGManagerTestInput ("default" ),
1569+ getMIGManagerTestOutput ("default" ),
1570+ },
1571+ {
1572+ "CustomConfig" ,
1573+ getMIGManagerTestInput ("custom-config" ),
1574+ getMIGManagerTestOutput ("custom-config" ),
1575+ },
1576+ }
1577+
1578+ for _ , tc := range testCases {
1579+ t .Run (tc .description , func (t * testing.T ) {
1580+ ds , err := testDaemonsetCommon (t , tc .clusterPolicy , "MIGManager" , tc .output ["numDaemonsets" ].(int ))
1581+ if err != nil {
1582+ t .Fatalf ("error in testDaemonsetCommon(): %v" , err )
1583+ }
1584+ if ds == nil {
1585+ return
1586+ }
1587+
1588+ migManagerImage := ""
1589+ mainCtrIdx := 0
1590+ migConfigVolumePresent := false
1591+
1592+ // Find nvidia-mig-manager container and check image
1593+ for i , container := range ds .Spec .Template .Spec .Containers {
1594+ if container .Name == "nvidia-mig-manager" {
1595+ migManagerImage = container .Image
1596+ mainCtrIdx = i
1597+ break
1598+ }
1599+ }
1600+
1601+ // Check for mig-parted-config volume
1602+ for _ , vol := range ds .Spec .Template .Spec .Volumes {
1603+ if vol .Name == "mig-parted-config" {
1604+ migConfigVolumePresent = true
1605+ break
1606+ }
1607+ }
1608+
1609+ require .Equal (t , tc .output ["migManagerImage" ], migManagerImage , "Unexpected configuration for mig-manager image" )
1610+ require .Equal (t , tc .output ["migConfigVolumePresent" ], migConfigVolumePresent , "Unexpected configuration for mig-parted-config volume" )
1611+
1612+ // Check expected env vars
1613+ for key , value := range tc .output ["env" ].(map [string ]string ) {
1614+ envFound := false
1615+ for _ , envVar := range ds .Spec .Template .Spec .Containers [mainCtrIdx ].Env {
1616+ if envVar .Name == key && envVar .Value == value {
1617+ envFound = true
1618+ }
1619+ }
1620+ if ! envFound {
1621+ t .Fatalf ("Expected env is not set for daemonset mig-manager %s->%s" , key , value )
1622+ }
1623+ }
1624+
1625+ // cleanup by deleting all kubernetes objects
1626+ err = removeState (& clusterPolicyController , clusterPolicyController .idx - 1 )
1627+ if err != nil {
1628+ t .Fatalf ("error removing state %v:" , err )
1629+ }
1630+ clusterPolicyController .idx --
1631+ })
1632+ }
1633+ }
0 commit comments