Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions args.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,10 @@ const (
ArgEnableAmdGpuDevicePlugin = "enable-amd-gpu-device-plugin"
// ArgEnableAmdGpuDeviceMetricsExporterPlugin enables automatic amd gpu device metrics exporter plugin installation.
ArgEnableAmdGpuDeviceMetricsExporterPlugin = "enable-amd-gpu-device-metrics-exporter-plugin"
// ArgEnableNvidiaGpuDevicePlugin enables automatic NVIDIA gpu device plugin installation.
ArgEnableNvidiaGpuDevicePlugin = "enable-nvidia-gpu-device-plugin"
// ArgEnableRDMASharedDevicePlugin enables automatic NVIDIA gpu device plugin installation.
ArgEnableRDMASharedDevicePlugin = "enable-rdma-shared-device-plugin"
// ArgSurgeUpgrade is a cluster's surge-upgrade argument.
ArgSurgeUpgrade = "surge-upgrade"
// ArgCommandUpsert is an upsert for a resource to be created or updated argument.
Expand Down
6 changes: 6 additions & 0 deletions commands/displayers/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ func (clusters *KubernetesClusters) Cols() []string {
"RoutingAgent",
"AmdGpuDevicePlugin",
"AmdGpuDeviceMetricsExporterPlugin",
"NvidiaGpuDevicePlugin",
"RDMASharedDevicePlugin",
}
}

Expand Down Expand Up @@ -91,6 +93,8 @@ func (clusters *KubernetesClusters) ColMap() map[string]string {
"RoutingAgent": "Routing Agent",
"AmdGpuDevicePlugin": "AMD GPU Device Plugin",
"AmdGpuDeviceMetricsExporterPlugin": "AMD GPU Device Metrics Exporter Plugin",
"NvidiaGpuDevicePlugin": "NVIDIA GPU Device Plugin",
"RDMASharedDevicePlugin": "RDMA Shared Device Plugin",
}
}

Expand Down Expand Up @@ -129,6 +133,8 @@ func (clusters *KubernetesClusters) KV() []map[string]any {
"RoutingAgent": cluster.RoutingAgent != nil && *cluster.RoutingAgent.Enabled,
"AmdGpuDevicePlugin": cluster.AmdGpuDevicePlugin != nil && *cluster.AmdGpuDevicePlugin.Enabled,
"AmdGpuDeviceMetricsExporterPlugin": cluster.AmdGpuDeviceMetricsExporterPlugin != nil && *cluster.AmdGpuDeviceMetricsExporterPlugin.Enabled,
"NvidiaGpuDevicePlugin": cluster.NvidiaGpuDevicePlugin != nil && *cluster.NvidiaGpuDevicePlugin.Enabled,
"RDMASharedDevicePlugin": cluster.RdmaSharedDevicePlugin != nil && *cluster.RdmaSharedDevicePlugin.Enabled,
}

if cfg := cluster.ClusterAutoscalerConfiguration; cfg != nil {
Expand Down
69 changes: 68 additions & 1 deletion commands/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,10 @@ After creating a cluster, a configuration context is added to kubectl and made a
"Creates the cluster with amd gpu device plugin installed. Defaults to true for clusters with AMD GPUs and otherwise false. To always enable it, supply --enable-amd-gpu-device-plugin=true.")
AddBoolFlag(cmdKubeClusterCreate, doctl.ArgEnableAmdGpuDeviceMetricsExporterPlugin, "", false,
"Creates the cluster with amd gpu device metrics exporter plugin installed. Defaults to false. To enable it, supply --enable-amd-gpu-device-metrics-exporter-plugin=true.")
AddBoolFlag(cmdKubeClusterCreate, doctl.ArgEnableNvidiaGpuDevicePlugin, "", false,
"Creates the cluster with nvidia gpu device plugin installed. Defaults to true for clusters with NVIDIA GPUs and otherwise false. To always enable it, supply --enable-nvidia-gpu-device-plugin=true.")
AddBoolFlag(cmdKubeClusterCreate, doctl.ArgEnableRDMASharedDevicePlugin, "", false,
"Creates the cluster with k8s-rdma-shared-dev-plugin device plugin installed. Defaults to true for clusters with GPU nodes connected to a dedicated high-speed networking fabric. To always enable it, supply --enable-rdma-shared-device-plugin=true.")
AddStringSliceFlag(cmdKubeClusterCreate, doctl.ArgTag, "", nil,
"A comma-separated list of `tags` to apply to the cluster, in addition to the default tags of `k8s` and `k8s:$K8S_CLUSTER_ID`.")
AddStringFlag(cmdKubeClusterCreate, doctl.ArgSizeSlug, "",
Expand Down Expand Up @@ -357,6 +361,10 @@ Updates the configuration values for a Kubernetes cluster. The cluster must be r
"Creates the cluster with amd gpu device plugin installed. Defaults to true for clusters with AMD GPUs and otherwise false. To always enable it, supply --enable-amd-gpu-device-plugin=true.")
AddBoolFlag(cmdKubeClusterUpdate, doctl.ArgEnableAmdGpuDeviceMetricsExporterPlugin, "", false,
"Creates the cluster with amd gpu device metrics exporter plugin installed. Defaults to false. To enable it, supply --enable-amd-gpu-device-metrics-exporter-plugin=true.")
AddBoolFlag(cmdKubeClusterUpdate, doctl.ArgEnableNvidiaGpuDevicePlugin, "", false,
"Creates the cluster with nvidia gpu device plugin installed. Defaults to true for clusters with NVIDIA GPUs and otherwise false. To always enable it, supply --enable-nvidia-gpu-device-plugin=true.")
AddBoolFlag(cmdKubeClusterUpdate, doctl.ArgEnableRDMASharedDevicePlugin, "", false,
"Creates the cluster with k8s-rdma-shared-dev-plugin device plugin installed. Defaults to true for clusters with GPU nodes connected to a dedicated high-speed networking fabric. To always enable it, supply --enable-rdma-shared-device-plugin=true.")
AddStringFlag(cmdKubeClusterUpdate, doctl.ArgClusterAutoscalerScaleDownUtilizationThreshold, "", "",
"The threshold value for the cluster autoscaler's scale-down-utilization-threshold. It is the maximum value between the sum of CPU requests and sum of memory requests of all pods running on the node divided by node's corresponding allocatable resource, below which a node can be considered for scale down. To set the scale-down-utilization-threshold to 50%, pass the floating point value 0.5.")
AddStringFlag(cmdKubeClusterUpdate, doctl.ArgClusterAutoscalerScaleDownUnneededTime, "", "",
Expand Down Expand Up @@ -1726,7 +1734,6 @@ func buildClusterCreateRequestFromArgs(c *CmdConfig, r *godo.KubernetesClusterCr

// We need to differentiate here if the option is set or not, as it defaults to a different value on the server-side
// depending on whether there are AMD GPU nodes in the cluster or not.
//
// If we would always send "false", even if the flag isn't set, we would essentially disable the defaulting.
if c.Doit.IsSet(doctl.ArgEnableAmdGpuDevicePlugin) {
enableAmdGpuDevicePlugin, err := c.Doit.GetBoolPtr(c.NS, doctl.ArgEnableAmdGpuDevicePlugin)
Expand All @@ -1750,6 +1757,36 @@ func buildClusterCreateRequestFromArgs(c *CmdConfig, r *godo.KubernetesClusterCr
}
}

// We need to differentiate here if the option is set or not, as it defaults to a different value on the server-side
// depending on whether there are NVIDIA GPU nodes in the cluster or not.
// If we would always send "false", even if the flag isn't set, we would essentially disable the defaulting.
if c.Doit.IsSet(doctl.ArgEnableNvidiaGpuDevicePlugin) {
enableNvidiaGpuDevicePlugin, err := c.Doit.GetBoolPtr(c.NS, doctl.ArgEnableNvidiaGpuDevicePlugin)
if err != nil {
return err
}
if enableNvidiaGpuDevicePlugin != nil {
r.NvidiaGpuDevicePlugin = &godo.KubernetesNvidiaGpuDevicePlugin{
Enabled: enableNvidiaGpuDevicePlugin,
}
}
}

// We need to differentiate here if the option is set or not, as it defaults to a different value on the server-side
// depending on whether there are fabric connected GPU nodes in the cluster or not.
// If we would always send "false", even if the flag isn't set, we would essentially disable the defaulting.
if c.Doit.IsSet(doctl.ArgEnableRDMASharedDevicePlugin) {
enableRDMASharedDevicePlugin, err := c.Doit.GetBoolPtr(c.NS, doctl.ArgEnableRDMASharedDevicePlugin)
if err != nil {
return err
}
if enableRDMASharedDevicePlugin != nil {
r.RdmaSharedDevicePlugin = &godo.KubernetesRdmaSharedDevicePlugin{
Enabled: enableRDMASharedDevicePlugin,
}
}
}

var clusterAutoscalerConfiguration = &godo.KubernetesClusterAutoscalerConfiguration{}
thresholdStr, err := c.Doit.GetString(c.NS, doctl.ArgClusterAutoscalerScaleDownUtilizationThreshold)
if err != nil {
Expand Down Expand Up @@ -1933,6 +1970,36 @@ func buildClusterUpdateRequestFromArgs(c *CmdConfig, r *godo.KubernetesClusterUp
}
}

// We need to differentiate here if the option is set or not, as it defaults to a different value on the server-side
// depending on whether there are NVIDIA GPU nodes in the cluster or not.
// If we would always send "false", even if the flag isn't set, we would essentially disable the defaulting.
if c.Doit.IsSet(doctl.ArgEnableNvidiaGpuDevicePlugin) {
enableNvidiaGpuDevicePlugin, err := c.Doit.GetBoolPtr(c.NS, doctl.ArgEnableNvidiaGpuDevicePlugin)
if err != nil {
return err
}
if enableNvidiaGpuDevicePlugin != nil {
r.NvidiaGpuDevicePlugin = &godo.KubernetesNvidiaGpuDevicePlugin{
Enabled: enableNvidiaGpuDevicePlugin,
}
}
}

// We need to differentiate here if the option is set or not, as it defaults to a different value on the server-side
// depending on whether there are fabric connected GPU nodes in the cluster or not.
// If we would always send "false", even if the flag isn't set, we would essentially disable the defaulting.
if c.Doit.IsSet(doctl.ArgEnableRDMASharedDevicePlugin) {
enableRDMASharedDevicePlugin, err := c.Doit.GetBoolPtr(c.NS, doctl.ArgEnableRDMASharedDevicePlugin)
if err != nil {
return err
}
if enableRDMASharedDevicePlugin != nil {
r.RdmaSharedDevicePlugin = &godo.KubernetesRdmaSharedDevicePlugin{
Enabled: enableRDMASharedDevicePlugin,
}
}
}

var clusterAutoscalerConfiguration = &godo.KubernetesClusterAutoscalerConfiguration{}
thresholdStr, err := c.Doit.GetString(c.NS, doctl.ArgClusterAutoscalerScaleDownUtilizationThreshold)
if err != nil {
Expand Down
22 changes: 22 additions & 0 deletions commands/kubernetes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ var (
AmdGpuDeviceMetricsExporterPlugin: &godo.KubernetesAmdGpuDeviceMetricsExporterPlugin{
Enabled: boolPtr(true),
},
NvidiaGpuDevicePlugin: &godo.KubernetesNvidiaGpuDevicePlugin{
Enabled: boolPtr(true),
},
RdmaSharedDevicePlugin: &godo.KubernetesRdmaSharedDevicePlugin{
Enabled: boolPtr(true),
},
},
}

Expand Down Expand Up @@ -541,6 +547,12 @@ func TestKubernetesCreate(t *testing.T) {
AmdGpuDeviceMetricsExporterPlugin: &godo.KubernetesAmdGpuDeviceMetricsExporterPlugin{
Enabled: boolPtr(true),
},
NvidiaGpuDevicePlugin: &godo.KubernetesNvidiaGpuDevicePlugin{
Enabled: boolPtr(true),
},
RdmaSharedDevicePlugin: &godo.KubernetesRdmaSharedDevicePlugin{
Enabled: boolPtr(true),
},
}
tm.kubernetes.EXPECT().Create(&r).Return(&testCluster, nil)

Expand Down Expand Up @@ -570,6 +582,8 @@ func TestKubernetesCreate(t *testing.T) {
config.Doit.Set(config.NS, doctl.ArgEnableRoutingAgent, testCluster.RoutingAgent.Enabled)
config.Doit.Set(config.NS, doctl.ArgEnableAmdGpuDevicePlugin, testCluster.AmdGpuDevicePlugin.Enabled)
config.Doit.Set(config.NS, doctl.ArgEnableAmdGpuDeviceMetricsExporterPlugin, testCluster.AmdGpuDeviceMetricsExporterPlugin.Enabled)
config.Doit.Set(config.NS, doctl.ArgEnableNvidiaGpuDevicePlugin, testCluster.NvidiaGpuDevicePlugin.Enabled)
config.Doit.Set(config.NS, doctl.ArgEnableRDMASharedDevicePlugin, testCluster.RdmaSharedDevicePlugin.Enabled)

// Test with no vpc-uuid specified
err := testK8sCmdService().RunKubernetesClusterCreate("c-8", 3)(config)
Expand Down Expand Up @@ -637,6 +651,12 @@ func TestKubernetesUpdate(t *testing.T) {
AmdGpuDeviceMetricsExporterPlugin: &godo.KubernetesAmdGpuDeviceMetricsExporterPlugin{
Enabled: boolPtr(true),
},
NvidiaGpuDevicePlugin: &godo.KubernetesNvidiaGpuDevicePlugin{
Enabled: boolPtr(true),
},
RdmaSharedDevicePlugin: &godo.KubernetesRdmaSharedDevicePlugin{
Enabled: boolPtr(true),
},
}
tm.kubernetes.EXPECT().Update(testCluster.ID, &r).Return(&testCluster, nil)

Expand All @@ -653,6 +673,8 @@ func TestKubernetesUpdate(t *testing.T) {
config.Doit.Set(config.NS, doctl.ArgEnableRoutingAgent, testCluster.RoutingAgent.Enabled)
config.Doit.Set(config.NS, doctl.ArgEnableAmdGpuDevicePlugin, testCluster.AmdGpuDevicePlugin.Enabled)
config.Doit.Set(config.NS, doctl.ArgEnableAmdGpuDeviceMetricsExporterPlugin, testCluster.AmdGpuDeviceMetricsExporterPlugin.Enabled)
config.Doit.Set(config.NS, doctl.ArgEnableNvidiaGpuDevicePlugin, testCluster.NvidiaGpuDevicePlugin.Enabled)
config.Doit.Set(config.NS, doctl.ArgEnableRDMASharedDevicePlugin, testCluster.RdmaSharedDevicePlugin.Enabled)

err := testK8sCmdService().RunKubernetesClusterUpdate(config)
assert.NoError(t, err)
Expand Down
4 changes: 2 additions & 2 deletions integration/kubernetes_clusters_get_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ var (
`

k8sGetOutput = `
ID Name Region Version Auto Upgrade HA Control Plane Status Endpoint IPv4 Cluster Subnet Service Subnet Tags Created At Updated At Node Pools Autoscaler Scale Down Utilization Autoscaler Scale Down Unneeded Time Autoscaler Custom Expanders Routing Agent AMD GPU Device Plugin AMD GPU Device Metrics Exporter Plugin
some-cluster-id some-cluster-id nyc3 some-kube-version true false running production 2018-11-15 16:00:11 +0000 UTC 2018-11-15 16:00:11 +0000 UTC frontend-pool 50% 1m30s priority, random false false false
ID Name Region Version Auto Upgrade HA Control Plane Status Endpoint IPv4 Cluster Subnet Service Subnet Tags Created At Updated At Node Pools Autoscaler Scale Down Utilization Autoscaler Scale Down Unneeded Time Autoscaler Custom Expanders Routing Agent AMD GPU Device Plugin AMD GPU Device Metrics Exporter Plugin NVIDIA GPU Device Plugin RDMA Shared Device Plugin
some-cluster-id some-cluster-id nyc3 some-kube-version true false running production 2018-11-15 16:00:11 +0000 UTC 2018-11-15 16:00:11 +0000 UTC frontend-pool 50% 1m30s priority, random false false false false false
`
)
4 changes: 2 additions & 2 deletions integration/projects_resources_get_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,8 @@ ID Name Size Region Filesyste
}
`
projectsResourcesGetKubernetesOutput = `
ID Name Region Version Auto Upgrade HA Control Plane Status Endpoint IPv4 Cluster Subnet Service Subnet Tags Created At Updated At Node Pools Autoscaler Scale Down Utilization Autoscaler Scale Down Unneeded Time Autoscaler Custom Expanders Routing Agent AMD GPU Device Plugin AMD GPU Device Metrics Exporter Plugin
1111 false false provisioning k8s 2021-01-29 16:02:02 +0000 UTC 0001-01-01 00:00:00 +0000 UTC pool-test false false false
ID Name Region Version Auto Upgrade HA Control Plane Status Endpoint IPv4 Cluster Subnet Service Subnet Tags Created At Updated At Node Pools Autoscaler Scale Down Utilization Autoscaler Scale Down Unneeded Time Autoscaler Custom Expanders Routing Agent AMD GPU Device Plugin AMD GPU Device Metrics Exporter Plugin NVIDIA GPU Device Plugin RDMA Shared Device Plugin
1111 false false provisioning k8s 2021-01-29 16:02:02 +0000 UTC 0001-01-01 00:00:00 +0000 UTC pool-test false false false false false
`

projectsResourcesListKubernetesOutput = `
Expand Down
Loading