Skip to content

Commit 3fbacf0

Browse files
committed
cluster-api: node template in scale-from-0-nodes scenario with DRA
Modify TemplateNodeInfo() to return the template of ResourceSlice. This is to address the DRA expansion of Cluster Autoscaler, allowing users to set the number of GPUs and DRA driver name by specifying the annotation to NodeGroup provided by cluster-api. Signed-off-by: Tsubasa Watanabe <[email protected]>
1 parent 08e7250 commit 3fbacf0

6 files changed

+158
-7
lines changed

cluster-autoscaler/cloudprovider/clusterapi/README.md

+7-1
Original file line numberDiff line numberDiff line change
@@ -223,15 +223,21 @@ metadata:
223223
capacity.cluster-autoscaler.kubernetes.io/memory: "128G"
224224
capacity.cluster-autoscaler.kubernetes.io/cpu: "16"
225225
capacity.cluster-autoscaler.kubernetes.io/ephemeral-disk: "100Gi"
226+
capacity.cluster-autoscaler.kubernetes.io/maxPods: "200"
227+
// Device Plugin
226228
capacity.cluster-autoscaler.kubernetes.io/gpu-type: "nvidia.com/gpu"
229+
// Dynamic Resource Allocation (DRA)
230+
capacity.cluster-autoscaler.kubernetes.io/dra-driver: "gpu.nvidia.com"
231+
// Common in Device Plugin and DRA
227232
capacity.cluster-autoscaler.kubernetes.io/gpu-count: "2"
228-
capacity.cluster-autoscaler.kubernetes.io/maxPods: "200"
229233
```
230234
231235
*Note* the `maxPods` annotation will default to `110` if it is not supplied.
232236
This value is inspired by the Kubernetes best practices
233237
[Considerations for large clusters](https://kubernetes.io/docs/setup/best-practices/cluster-large/).
234238

239+
*Note* User should select the annotation for GPU either `gpu-type` or `dra-driver` depends on whether using Device Plugin or Dynamic Resource Allocation(DRA). `gpu-count` is a common parameter in both.
240+
235241
#### RBAC changes for scaling from zero
236242

237243
If you are using the opt-in support for scaling from zero as defined by the

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,12 @@ func (ng *nodegroup) TemplateNodeInfo() (*framework.NodeInfo, error) {
283283
return nil, err
284284
}
285285

286-
nodeInfo := framework.NewNodeInfo(&node, nil, &framework.PodInfo{Pod: cloudprovider.BuildKubeProxy(ng.scalableResource.Name())})
286+
resourceSlices, err := ng.scalableResource.InstanceResourceSlices(nodeName)
287+
if err != nil {
288+
return nil, err
289+
}
290+
291+
nodeInfo := framework.NewNodeInfo(&node, resourceSlices, &framework.PodInfo{Pod: cloudprovider.BuildKubeProxy(ng.scalableResource.Name())})
287292
return nodeInfo, nil
288293
}
289294

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go

+51-5
Original file line numberDiff line numberDiff line change
@@ -1309,12 +1309,19 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
13091309
nodeGroupMaxSizeAnnotationKey: "10",
13101310
}
13111311

1312+
type testResourceSlice struct {
1313+
driverName string
1314+
gpuCount int
1315+
deviceType string
1316+
}
1317+
13121318
type testCaseConfig struct {
1313-
nodeLabels map[string]string
1314-
includeNodes bool
1315-
expectedErr error
1316-
expectedCapacity map[corev1.ResourceName]int64
1317-
expectedNodeLabels map[string]string
1319+
nodeLabels map[string]string
1320+
includeNodes bool
1321+
expectedErr error
1322+
expectedCapacity map[corev1.ResourceName]int64
1323+
expectedNodeLabels map[string]string
1324+
expectedResourceSlice testResourceSlice
13181325
}
13191326

13201327
testCases := []struct {
@@ -1407,6 +1414,33 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
14071414
},
14081415
},
14091416
},
1417+
{
1418+
name: "When the NodeGroup can scale from zero and DRA is enabled, it creates ResourceSlice derived from the annotation of DRA driver name and GPU count",
1419+
nodeGroupAnnotations: map[string]string{
1420+
memoryKey: "2048Mi",
1421+
cpuKey: "2",
1422+
draDriverKey: "gpu.nvidia.com",
1423+
gpuCountKey: "2",
1424+
},
1425+
config: testCaseConfig{
1426+
expectedErr: nil,
1427+
expectedCapacity: map[corev1.ResourceName]int64{
1428+
corev1.ResourceCPU: 2,
1429+
corev1.ResourceMemory: 2048 * 1024 * 1024,
1430+
corev1.ResourcePods: 110,
1431+
},
1432+
expectedResourceSlice: testResourceSlice{
1433+
driverName: "gpu.nvidia.com",
1434+
gpuCount: 2,
1435+
deviceType: GpuDeviceType,
1436+
},
1437+
expectedNodeLabels: map[string]string{
1438+
"kubernetes.io/os": "linux",
1439+
"kubernetes.io/arch": "amd64",
1440+
"kubernetes.io/hostname": "random value",
1441+
},
1442+
},
1443+
},
14101444
}
14111445

14121446
test := func(t *testing.T, testConfig *testConfig, config testCaseConfig) {
@@ -1470,6 +1504,18 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
14701504
}
14711505
}
14721506
}
1507+
for _, resourceslice := range nodeInfo.LocalResourceSlices {
1508+
if resourceslice.Spec.Driver != config.expectedResourceSlice.driverName {
1509+
t.Errorf("Expected DRA driver in ResourceSlice to have: %s, but got: %s", config.expectedResourceSlice.driverName, resourceslice.Spec.Driver)
1510+
} else if len(resourceslice.Spec.Devices) != config.expectedResourceSlice.gpuCount {
1511+
t.Errorf("Expected the number of DRA devices in ResourceSlice to have: %d, but got: %d", config.expectedResourceSlice.gpuCount, len(resourceslice.Spec.Devices))
1512+
}
1513+
for _, device := range resourceslice.Spec.Devices {
1514+
if *device.Basic.Attributes["type"].StringValue != config.expectedResourceSlice.deviceType {
1515+
t.Errorf("Expected device type to have: %s, but got: %s", config.expectedResourceSlice.deviceType, *device.Basic.Attributes["type"].StringValue)
1516+
}
1517+
}
1518+
}
14731519
}
14741520

14751521
for _, tc := range testCases {

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go

+47
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,21 @@ import (
2020
"context"
2121
"fmt"
2222
"path"
23+
"strconv"
2324
"strings"
2425
"time"
2526

2627
"github.com/pkg/errors"
2728
apiv1 "k8s.io/api/core/v1"
2829
corev1 "k8s.io/api/core/v1"
30+
resourceapi "k8s.io/api/resource/v1beta1"
2931
"k8s.io/apimachinery/pkg/api/resource"
3032
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3133
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3234
"k8s.io/apimachinery/pkg/runtime/schema"
3335
"k8s.io/apimachinery/pkg/util/validation"
3436
klog "k8s.io/klog/v2"
37+
"k8s.io/utils/ptr"
3538
)
3639

3740
type unstructuredScalableResource struct {
@@ -297,6 +300,46 @@ func (r unstructuredScalableResource) InstanceCapacity() (map[corev1.ResourceNam
297300
return capacity, nil
298301
}
299302

303+
func (r unstructuredScalableResource) InstanceResourceSlices(nodeName string) ([]*resourceapi.ResourceSlice, error) {
304+
driver := r.InstanceDRADriver()
305+
gpuCount, err := r.InstanceGPUCapacityAnnotation()
306+
if err != nil {
307+
return nil, err
308+
}
309+
310+
var result []*resourceapi.ResourceSlice
311+
if driver != "" && !gpuCount.IsZero() {
312+
resourceslice := &resourceapi.ResourceSlice{
313+
ObjectMeta: metav1.ObjectMeta{
314+
Name: nodeName + "-" + driver,
315+
},
316+
Spec: resourceapi.ResourceSliceSpec{
317+
Driver: driver,
318+
NodeName: nodeName,
319+
Pool: resourceapi.ResourcePool{
320+
Name: nodeName,
321+
},
322+
},
323+
}
324+
for i := 0; i < int(gpuCount.Value()); i++ {
325+
device := resourceapi.Device{
326+
Name: "gpu-" + strconv.Itoa(i),
327+
Basic: &resourceapi.BasicDevice{
328+
Attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{
329+
"type": {
330+
StringValue: ptr.To(GpuDeviceType),
331+
},
332+
},
333+
},
334+
}
335+
resourceslice.Spec.Devices = append(resourceslice.Spec.Devices, device)
336+
}
337+
result = append(result, resourceslice)
338+
return result, nil
339+
}
340+
return nil, nil
341+
}
342+
300343
func (r unstructuredScalableResource) InstanceEphemeralDiskCapacityAnnotation() (resource.Quantity, error) {
301344
return parseEphemeralDiskCapacity(r.unstructured.GetAnnotations())
302345
}
@@ -321,6 +364,10 @@ func (r unstructuredScalableResource) InstanceMaxPodsCapacityAnnotation() (resou
321364
return parseMaxPodsCapacity(r.unstructured.GetAnnotations())
322365
}
323366

367+
func (r unstructuredScalableResource) InstanceDRADriver() string {
368+
return parseDRADriver(r.unstructured.GetAnnotations())
369+
}
370+
324371
func (r unstructuredScalableResource) readInfrastructureReferenceResource() (*unstructured.Unstructured, error) {
325372
infraref, found, err := unstructured.NestedStringMap(r.unstructured.Object, "spec", "template", "spec", "infrastructureRef")
326373
if !found || err != nil {

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go

+37
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,12 @@ import (
2424

2525
"github.com/stretchr/testify/assert"
2626
v1 "k8s.io/api/core/v1"
27+
resourceapi "k8s.io/api/resource/v1beta1"
2728
"k8s.io/apimachinery/pkg/api/resource"
2829
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2930
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3031
"k8s.io/client-go/tools/cache"
32+
"k8s.io/utils/ptr"
3133
)
3234

3335
const (
@@ -297,6 +299,32 @@ func TestAnnotations(t *testing.T) {
297299
gpuQuantity := resource.MustParse("1")
298300
maxPodsQuantity := resource.MustParse("42")
299301
expectedTaints := []v1.Taint{{Key: "key1", Effect: v1.TaintEffectNoSchedule, Value: "value1"}, {Key: "key2", Effect: v1.TaintEffectNoExecute, Value: "value2"}}
302+
testNodeName := "test-node"
303+
draDriver := "test-driver"
304+
expectedResourceSlice := &resourceapi.ResourceSlice{
305+
ObjectMeta: metav1.ObjectMeta{
306+
Name: testNodeName + "-" + draDriver,
307+
},
308+
Spec: resourceapi.ResourceSliceSpec{
309+
Driver: draDriver,
310+
NodeName: testNodeName,
311+
Pool: resourceapi.ResourcePool{
312+
Name: testNodeName,
313+
},
314+
Devices: []resourceapi.Device{
315+
{
316+
Name: "gpu-0",
317+
Basic: &resourceapi.BasicDevice{
318+
Attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{
319+
"type": {
320+
StringValue: ptr.To(GpuDeviceType),
321+
},
322+
},
323+
},
324+
},
325+
},
326+
},
327+
}
300328
annotations := map[string]string{
301329
cpuKey: cpuQuantity.String(),
302330
memoryKey: memQuantity.String(),
@@ -305,6 +333,7 @@ func TestAnnotations(t *testing.T) {
305333
maxPodsKey: maxPodsQuantity.String(),
306334
taintsKey: "key1=value1:NoSchedule,key2=value2:NoExecute",
307335
labelsKey: "key3=value3,key4=value4,key5=value5",
336+
draDriverKey: draDriver,
308337
}
309338

310339
test := func(t *testing.T, testConfig *testConfig, testResource *unstructured.Unstructured) {
@@ -346,6 +375,14 @@ func TestAnnotations(t *testing.T) {
346375
t.Errorf("expected %v, got %v", maxPodsQuantity, maxPods)
347376
}
348377

378+
if resourceSlices, err := sr.InstanceResourceSlices(testNodeName); err != nil {
379+
t.Fatal(err)
380+
} else {
381+
for _, resourceslice := range resourceSlices {
382+
assert.Equal(t, expectedResourceSlice, resourceslice)
383+
}
384+
}
385+
349386
taints := sr.Taints()
350387
assert.Equal(t, expectedTaints, taints)
351388

cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go

+10
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ const (
4040
maxPodsKey = "capacity.cluster-autoscaler.kubernetes.io/maxPods"
4141
taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints"
4242
labelsKey = "capacity.cluster-autoscaler.kubernetes.io/labels"
43+
draDriverKey = "capacity.cluster-autoscaler.kubernetes.io/dra-driver"
4344
// UnknownArch is used if the Architecture is Unknown
4445
UnknownArch SystemArchitecture = ""
4546
// Amd64 is used if the Architecture is x86_64
@@ -54,6 +55,8 @@ const (
5455
DefaultArch = Amd64
5556
// scaleUpFromZeroDefaultEnvVar is the name of the env var for the default architecture
5657
scaleUpFromZeroDefaultArchEnvVar = "CAPI_SCALE_ZERO_DEFAULT_ARCH"
58+
// GpuDeviceType is used if DRA device is GPU
59+
GpuDeviceType = "gpu"
5760
)
5861

5962
var (
@@ -282,6 +285,13 @@ func parseMaxPodsCapacity(annotations map[string]string) (resource.Quantity, err
282285
return parseIntKey(annotations, maxPodsKey)
283286
}
284287

288+
func parseDRADriver(annotations map[string]string) string {
289+
if val, found := annotations[draDriverKey]; found {
290+
return val
291+
}
292+
return ""
293+
}
294+
285295
func clusterNameFromResource(r *unstructured.Unstructured) string {
286296
// Use Spec.ClusterName if defined (only available on v1alpha3+ types)
287297
clusterName, found, err := unstructured.NestedString(r.Object, "spec", "clusterName")

0 commit comments

Comments
 (0)