Skip to content

Commit d822819

Browse files
committed
Fix dra driver installation
Signed-off-by: Erez Zarum <[email protected]>
1 parent d72b5e3 commit d822819

File tree

3 files changed

+67
-1
lines changed

3 files changed

+67
-1
lines changed

demo/clusters/eks/delete-cluster.sh

100644100755
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,5 @@ DRIVER_NAME=$(from_versions_mk "DRIVER_NAME")
2020
export CLUSTER_NAME
2121
export AWS_REGION
2222

23-
# Delete EKS cluster using eksctl
23+
## Delete EKS cluster using eksctl
2424
eksctl delete cluster --name ${CLUSTER_NAME} --region ${AWS_REGION} --wait

demo/clusters/eks/install-dra-driver-gpu.sh

100644100755
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/bin/bash
22

3+
CURRENT_DIR="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)"
34
PROJECT_DIR="$(cd -- "$( dirname -- "${CURRENT_DIR}/../../../.." )" &> /dev/null && pwd)"
45

56
helm upgrade -i --create-namespace --namespace nvidia-dra-driver-gpu nvidia-dra-driver-gpu ${PROJECT_DIR}/deployments/helm/nvidia-dra-driver-gpu \
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
apiVersion: eksctl.io/v1alpha5
2+
kind: ClusterConfig
3+
metadata:
4+
name: k8s-dra-driver-gpu-cluster
5+
region: us-east-2
6+
version: "1.33"
7+
availabilityZones:
8+
- us-east-2a
9+
- us-east-2b
10+
- us-east-2c
11+
managedNodeGroups:
12+
- name: system
13+
amiFamily: AmazonLinux2023
14+
instanceType: m6g.large
15+
privateNetworking: true
16+
desiredCapacity: 2
17+
minSize: 2
18+
maxSize: 4
19+
labels:
20+
role: system
21+
taints:
22+
- key: CriticalAddonsOnly
23+
effect: NoSchedule
24+
updateConfig:
25+
maxUnavailable: 1
26+
- name: gpu
27+
amiFamily: AmazonLinux2023
28+
instanceType: g6e.4xlarge
29+
privateNetworking: true
30+
desiredCapacity: 1
31+
minSize: 1
32+
maxSize: 4
33+
labels:
34+
role: gpu
35+
nvidia.com/gpu.present: "true"
36+
taints:
37+
- key: nvidia.com/gpu
38+
effect: NoSchedule
39+
updateConfig:
40+
maxUnavailable: 1
41+
overrideBootstrapCommand: |
42+
apiVersion: node.eks.aws/v1alpha1
43+
kind: NodeConfig
44+
spec:
45+
kubelet:
46+
config:
47+
featureGates:
48+
DynamicResourceAllocation: true
49+
cloudWatch:
50+
clusterLogging:
51+
enableTypes: ["*"]
52+
addons:
53+
- name: vpc-cni
54+
version: latest
55+
- name: coredns
56+
version: latest
57+
configurationValues: |-
58+
nodeSelector:
59+
role: system
60+
- name: eks-pod-identity-agent
61+
version: latest
62+
- name: kube-proxy
63+
version: latest
64+
iam:
65+
withOIDC: true

0 commit comments

Comments
 (0)