Skip to content

Commit 0e9767a

Browse files
authored
refactor: ocp functions + aro gpus
1 parent bb3614b commit 0e9767a

File tree

26 files changed

+820
-501
lines changed

26 files changed

+820
-501
lines changed

.wordlist-md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ ArgoCD's
3030
argoproj
3131
arn
3232
ARN
33+
aro
34+
ARO
3335
auth
3436
authfile
3537
authorino
@@ -219,6 +221,7 @@ managementState
219221
mariadb
220222
mastersSchedulable
221223
memberof
224+
microsoft
222225
mig
223226
minio
224227
mkdir
@@ -246,6 +249,7 @@ nodeFeatureDiscovery
246249
noobaa
247250
Noobaa
248251
NuGet
252+
nv
249253
nvidia
250254
Nvidia's
251255
nw

.wordlist-sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ mutatingwebhookconfiguration
195195
namedCertificates
196196
nat
197197
nbB
198+
ncast
198199
ne
199200
newkey
200201
NGC
@@ -206,6 +207,7 @@ NoScoring
206207
ns
207208
NUM
208209
nvme
210+
nvv
209211
oadp
210212
observability
211213
observabilityaddons
@@ -270,6 +272,7 @@ servingCerts
270272
sha
271273
shellcheck
272274
SingleNamespace
275+
sizebasic
273276
skel
274277
sl
275278
sL

components/cluster-configs/autoscale/base/job.yaml

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
apiVersion: rbac.authorization.k8s.io/v1
33
kind: ClusterRole
44
metadata:
5-
name: job-setup-machineset
5+
name: job-setup-autoscale
66
rules:
77
- apiGroups:
88
- machine.openshift.io
@@ -22,6 +22,7 @@ rules:
2222
- secrets
2323
resourceNames:
2424
- aws-creds
25+
- azure-credentials
2526
verbs:
2627
- get
2728
- list
@@ -33,24 +34,24 @@ rules:
3334
apiVersion: rbac.authorization.k8s.io/v1
3435
kind: ClusterRoleBinding
3536
metadata:
36-
name: job-setup-machineset
37+
name: job-setup-autoscale
3738
roleRef:
3839
apiGroup: rbac.authorization.k8s.io
3940
kind: ClusterRole
40-
name: job-setup-machineset
41+
name: job-setup-autoscale
4142
subjects:
4243
- kind: ServiceAccount
43-
name: job-setup-machineset
44+
name: job-setup-autoscale
4445
---
4546
apiVersion: v1
4647
kind: ServiceAccount
4748
metadata:
48-
name: job-setup-machineset
49+
name: job-setup-autoscale
4950
---
5051
apiVersion: batch/v1
5152
kind: Job
5253
metadata:
53-
name: job-setup-machineset
54+
name: job-setup-autoscale
5455
annotations:
5556
argocd.argoproj.io/hook: Sync
5657
# argocd.argoproj.io/hook-delete-policy: HookSucceeded
@@ -80,9 +81,9 @@ spec:
8081
volumes:
8182
- name: scripts
8283
configMap:
83-
name: job-setup-machineset
84+
name: job-setup-autoscale
8485
defaultMode: 0755
8586
restartPolicy: Never
8687
terminationGracePeriodSeconds: 30
87-
serviceAccount: job-setup-machineset
88-
serviceAccountName: job-setup-machineset
88+
serviceAccount: job-setup-autoscale
89+
serviceAccountName: job-setup-autoscale

components/cluster-configs/autoscale/base/kustomization.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ generatorOptions:
1616
disableNameSuffixHash: true
1717

1818
configMapGenerator:
19-
- name: job-setup-machineset
19+
- name: job-setup-autoscale
2020
files:
2121
- job.sh
2222
# - ../../../../../../scripts/library/ocp.sh
2323
# kludge: refer to source
24-
- https://raw.githubusercontent.com/redhat-na-ssa/demo-ai-gitops-catalog/v0.09/scripts/library/ocp.sh
24+
- https://raw.githubusercontent.com/redhat-na-ssa/demo-ai-gitops-catalog/v0.12/scripts/library/ocp.sh

components/operators/aikit-operator/instance/overlays/default/kustomization.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
apiVersion: kustomize.config.k8s.io/v1beta1
33
kind: Kustomization
44

5-
namespace: stackrox
5+
namespace: redhat-ods-applications
66

77
resources:
88
- ../../base

components/operators/gpu-operator-certified/aggregate/overlays/aws/kustomization.yaml

Lines changed: 0 additions & 9 deletions
This file was deleted.

components/operators/gpu-operator-certified/instance/base/kustomization.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,5 @@ resources:
77

88
components:
99
- ../components/monitoring-dashboard
10+
- ../components/aro-gpu-machineset
11+
- ../components/aws-gpu-machineset
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# aws-gpu-machineset
2+
3+
## Purpose
4+
5+
This component is designed to setup a MachineSet with GPUs on an ARO based OpenShift cluster.
6+
7+
This component triggers a job that creates a MachineSet based on your current MachineSet.
8+
9+
This component has been tested using ARO based OpenShift instances provisioned by demo.redhat.com.
10+
11+
## Usage
12+
13+
This component can be added to a base by adding the `components` section to your overlay `kustomization.yaml` file:
14+
15+
```
16+
apiVersion: kustomize.config.k8s.io/v1beta1
17+
kind: Kustomization
18+
19+
resources:
20+
- ../../base
21+
22+
components:
23+
- ../../components/aro-gpu-machineset
24+
```
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/bash
2+
3+
# shellcheck disable=SC1091
4+
. /scripts/ocp.sh
5+
6+
INSTANCE_TYPE=${INSTANCE_TYPE:-Standard_NC4as_T4_v3}
7+
8+
ocp_aro_cluster || exit 0
9+
ocp_aro_machineset_create_gpu "${INSTANCE_TYPE}"
10+
ocp_machineset_create_autoscale
11+
# ocp_machineset_taint_gpu
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
---
2+
apiVersion: v1
3+
kind: ServiceAccount
4+
metadata:
5+
name: job-aro-gpu-machineset
6+
namespace: nvidia-gpu-operator
7+
---
8+
apiVersion: rbac.authorization.k8s.io/v1
9+
kind: ClusterRole
10+
metadata:
11+
name: job-aro-gpu-machineset
12+
rules:
13+
- apiGroups:
14+
- machine.openshift.io
15+
resources:
16+
- machinesets
17+
verbs:
18+
- '*'
19+
- apiGroups:
20+
- autoscaling.openshift.io
21+
resources:
22+
- machineautoscalers
23+
verbs:
24+
- '*'
25+
- apiGroups:
26+
- ''
27+
resources:
28+
- secrets
29+
resourceNames:
30+
- azure-credentials
31+
verbs:
32+
- get
33+
- list
34+
# - nonResourceURLs:
35+
# - '*'
36+
# verbs:
37+
# - '*'
38+
---
39+
apiVersion: rbac.authorization.k8s.io/v1
40+
kind: ClusterRoleBinding
41+
metadata:
42+
name: job-aro-gpu-machineset
43+
roleRef:
44+
apiGroup: rbac.authorization.k8s.io
45+
kind: ClusterRole
46+
name: job-aro-gpu-machineset
47+
subjects:
48+
- kind: ServiceAccount
49+
name: job-aro-gpu-machineset
50+
namespace: nvidia-gpu-operator
51+
---
52+
apiVersion: batch/v1
53+
kind: Job
54+
metadata:
55+
generateName: job-aro-gpu-machineset-
56+
name: job-aro-gpu-machineset
57+
namespace: nvidia-gpu-operator
58+
# annotations:
59+
# argocd.argoproj.io/hook: Sync
60+
# argocd.argoproj.io/hook-delete-policy: HookSucceeded
61+
spec:
62+
template:
63+
spec:
64+
containers:
65+
- name: job-aro-gpu-machineset
66+
# image: image-registry.openshift-image-registry.svc:5000/openshift/tools:latest
67+
image: registry.redhat.io/openshift4/ose-cli
68+
env:
69+
- name: INSTANCE_TYPE
70+
value: "Standard_NC4as_T4_v3"
71+
- name: NAMESPACE
72+
valueFrom:
73+
fieldRef:
74+
fieldPath: metadata.namespace
75+
command:
76+
- /bin/bash
77+
- -c
78+
- /scripts/job.sh
79+
volumeMounts:
80+
- name: scripts
81+
mountPath: /scripts
82+
volumes:
83+
- name: scripts
84+
configMap:
85+
name: job-aro-gpu-machineset
86+
defaultMode: 0755
87+
restartPolicy: Never
88+
terminationGracePeriodSeconds: 30
89+
serviceAccount: job-aro-gpu-machineset
90+
serviceAccountName: job-aro-gpu-machineset

0 commit comments

Comments
 (0)