diff --git a/docs/examples/jobset/jobset.md b/docs/examples/jobset/jobset.md index e809f36f..060df2ac 100644 --- a/docs/examples/jobset/jobset.md +++ b/docs/examples/jobset/jobset.md @@ -10,7 +10,8 @@ Install [JobSet API](https://github.com/kubernetes-sigs/jobset) in your cluster: ```shell -kubectl apply --server-side -f https://github.com/kubernetes-sigs/jobset/releases/download/v0.5.2/manifests.yaml +JOBSET_VERSION=v0.8.1 +kubectl apply --server-side -f https://github.com/kubernetes-sigs/jobset/releases/download/${JOBSET_VERSION}/manifests.yaml ``` Run a jobset with workers: diff --git a/docs/examples/kai/kai.md b/docs/examples/kai/kai.md index a2f2a277..2cf6efb0 100644 --- a/docs/examples/kai/kai.md +++ b/docs/examples/kai/kai.md @@ -1,10 +1,15 @@ ## Example of running `KAI` with `knavigator` -### Running workflows with `MPI job` +### Running workflows with `MPI job` and `Job` Install [KAI scheduler](https://github.com/NVIDIA/KAI-Scheduler/blob/main/README.md) in your cluster. -Run an MPI job: +Run an MPI job: ```shell ./bin/knavigator -workflow resources/workflows/kai/test-mpijob.yaml ``` + +Run a multi-replica Job: +```shell +./bin/knavigator -workflow resources/workflows/kai/test-job.yaml +``` diff --git a/docs/examples/kueue/kueue.md b/docs/examples/kueue/kueue.md index f3a25d0e..0e991101 100644 --- a/docs/examples/kueue/kueue.md +++ b/docs/examples/kueue/kueue.md @@ -3,7 +3,7 @@ Install `kueue` by following these [instructions](https://kueue.sigs.k8s.io/docs/installation/): ```bash -KUEUE_VERSION=v0.9.0 +KUEUE_VERSION=v0.11.4 kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/${KUEUE_VERSION}/manifests.yaml kubectl apply -f charts/overrides/kueue/priority.yaml diff --git a/resources/benchmarks/gang-scheduling/workflows/config-kai.yaml b/resources/benchmarks/gang-scheduling/workflows/config-kai.yaml new file mode 100644 index 00000000..0a164b8b --- /dev/null +++ b/resources/benchmarks/gang-scheduling/workflows/config-kai.yaml @@ -0,0 +1,43 @@ +# Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: test-kai-job +description: register, deploy and configure kai custom resources +tasks: +- id: register-queue + type: RegisterObj + params: + template: "resources/templates/kai/queue.yaml" +- id: register + type: RegisterObj + params: + template: "resources/benchmarks/templates/kai/job.yaml" + nameFormat: "job{{._ENUM_}}" + podNameFormat: "{{._NAME_}}-[a-z0-9]+" + podCount: "{{.replicas}}" +- id: default-queue + type: SubmitObj + params: + refTaskId: register-queue + canExist: true + params: + name: default +- id: test-queue + type: SubmitObj + params: + refTaskId: register-queue + canExist: true + params: + name: test + parentQueue: default diff --git a/resources/benchmarks/gang-scheduling/workflows/config-kueue.yaml b/resources/benchmarks/gang-scheduling/workflows/config-kueue.yaml index 8ed0315d..69f6f11b 100644 --- a/resources/benchmarks/gang-scheduling/workflows/config-kueue.yaml +++ b/resources/benchmarks/gang-scheduling/workflows/config-kueue.yaml @@ -118,7 +118,6 @@ tasks: - "ray.io/rayjob" - "ray.io/raycluster" - "jobset.x-k8s.io/jobset" - - "kubeflow.org/mxjob" - "kubeflow.org/paddlejob" - "kubeflow.org/pytorchjob" - "kubeflow.org/tfjob" diff --git a/resources/benchmarks/scaling/workflows/config-kai.yaml b/resources/benchmarks/scaling/workflows/config-kai.yaml new file mode 100644 index 00000000..0a164b8b --- /dev/null +++ b/resources/benchmarks/scaling/workflows/config-kai.yaml @@ -0,0 +1,43 @@ +# Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: test-kai-job +description: register, deploy and configure kai custom resources +tasks: +- id: register-queue + type: RegisterObj + params: + template: "resources/templates/kai/queue.yaml" +- id: register + type: RegisterObj + params: + template: "resources/benchmarks/templates/kai/job.yaml" + nameFormat: "job{{._ENUM_}}" + podNameFormat: "{{._NAME_}}-[a-z0-9]+" + podCount: "{{.replicas}}" +- id: default-queue + type: SubmitObj + params: + refTaskId: register-queue + canExist: true + params: + name: default +- id: test-queue + type: SubmitObj + params: + refTaskId: register-queue + canExist: true + params: + name: test + parentQueue: default diff --git a/resources/benchmarks/scaling/workflows/config-kueue.yaml b/resources/benchmarks/scaling/workflows/config-kueue.yaml index e24db9e8..56c4cb5f 100644 --- a/resources/benchmarks/scaling/workflows/config-kueue.yaml +++ b/resources/benchmarks/scaling/workflows/config-kueue.yaml @@ -118,7 +118,6 @@ tasks: - "ray.io/rayjob" - "ray.io/raycluster" - "jobset.x-k8s.io/jobset" - - "kubeflow.org/mxjob" - "kubeflow.org/paddlejob" - "kubeflow.org/pytorchjob" - "kubeflow.org/tfjob" diff --git a/resources/benchmarks/templates/kai/job.yaml b/resources/benchmarks/templates/kai/job.yaml new file mode 100644 index 00000000..49de7dc4 --- /dev/null +++ b/resources/benchmarks/templates/kai/job.yaml @@ -0,0 +1,45 @@ +# Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: batch/v1 +kind: Job +metadata: + name: "{{._NAME_}}" + namespace: "default" +spec: + completions: {{.replicas}} + parallelism: {{.replicas}} + template: + metadata: + labels: + runai/queue: "test" + annotations: + pod-complete.stage.kwok.x-k8s.io/delay: {{.ttl}} + pod-complete.stage.kwok.x-k8s.io/jitter-delay: {{.ttl}} + spec: + schedulerName: kai-scheduler + containers: + - name: test + image: busybox + imagePullPolicy: IfNotPresent + resources: + limits: + cpu: 100m + memory: 250M + nvidia.com/gpu: "8" + requests: + cpu: 100m + memory: 250M + nvidia.com/gpu: "8" + restartPolicy: Never diff --git a/resources/benchmarks/templates/kai/queue.yaml b/resources/benchmarks/templates/kai/queue.yaml new file mode 100644 index 00000000..1ce70ceb --- /dev/null +++ b/resources/benchmarks/templates/kai/queue.yaml @@ -0,0 +1,35 @@ +# Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: scheduling.run.ai/v2 +kind: Queue +metadata: + name: "{{.name}}" +spec: + {{- if .parentQueue }} + parentQueue: "{{.parentQueue}}" + {{- end }} + resources: + cpu: + quota: -1 + limit: -1 + overQuotaWeight: 1 + gpu: + quota: -1 + limit: -1 + overQuotaWeight: 1 + memory: + quota: -1 + limit: -1 + overQuotaWeight: 1 diff --git a/resources/templates/kai/job.yaml b/resources/templates/kai/job.yaml new file mode 100644 index 00000000..153ffabf --- /dev/null +++ b/resources/templates/kai/job.yaml @@ -0,0 +1,45 @@ +# Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: batch/v1 +kind: Job +metadata: + name: "{{._NAME_}}" + namespace: "{{.namespace}}" +spec: + completions: {{.replicas}} + parallelism: {{.replicas}} + template: + metadata: + labels: + runai/queue: "{{.queue}}" + annotations: + pod-complete.stage.kwok.x-k8s.io/delay: {{.ttl}} + pod-complete.stage.kwok.x-k8s.io/jitter-delay: {{.ttl}} + spec: + schedulerName: kai-scheduler + containers: + - name: test + image: {{.image}} + imagePullPolicy: IfNotPresent + resources: + limits: + cpu: "{{.cpu}}" + memory: {{.memory}} + nvidia.com/gpu: "{{.gpu}}" + requests: + cpu: "{{.cpu}}" + memory: {{.memory}} + nvidia.com/gpu: "{{.gpu}}" + restartPolicy: Never diff --git a/resources/templates/kueue/cluster-queue.yaml b/resources/templates/kueue/cluster-queue.yaml index 058d5c8c..53a369fa 100644 --- a/resources/templates/kueue/cluster-queue.yaml +++ b/resources/templates/kueue/cluster-queue.yaml @@ -1,17 +1,3 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - apiVersion: kueue.x-k8s.io/v1beta1 kind: ClusterQueue metadata: diff --git a/resources/templates/kueue/job.yaml b/resources/templates/kueue/job.yaml index 5e53e05f..1e087588 100644 --- a/resources/templates/kueue/job.yaml +++ b/resources/templates/kueue/job.yaml @@ -1,17 +1,3 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - apiVersion: batch/v1 kind: Job metadata: diff --git a/resources/templates/kueue/local-queue.yaml b/resources/templates/kueue/local-queue.yaml index 204ed4b8..c8e9ca1a 100644 --- a/resources/templates/kueue/local-queue.yaml +++ b/resources/templates/kueue/local-queue.yaml @@ -1,17 +1,3 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - apiVersion: kueue.x-k8s.io/v1beta1 kind: LocalQueue metadata: diff --git a/resources/templates/kueue/resource-flavor.yaml b/resources/templates/kueue/resource-flavor.yaml index 530161c6..70b244ea 100644 --- a/resources/templates/kueue/resource-flavor.yaml +++ b/resources/templates/kueue/resource-flavor.yaml @@ -1,17 +1,3 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - apiVersion: kueue.x-k8s.io/v1beta1 kind: ResourceFlavor metadata: diff --git a/resources/workflows/kai/test-job.yaml b/resources/workflows/kai/test-job.yaml new file mode 100644 index 00000000..c98b4d60 --- /dev/null +++ b/resources/workflows/kai/test-job.yaml @@ -0,0 +1,72 @@ +# Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: test-kai-job +description: submit and validate a job with kai scheduler +tasks: +- id: register-queue + type: RegisterObj + params: + template: "resources/templates/kai/queue.yaml" +- id: register-job + type: RegisterObj + params: + template: "resources/templates/kai/job.yaml" + nameFormat: "job{{._ENUM_}}" + podNameFormat: "{{._NAME_}}-[a-z0-9]+" + podCount: "{{.replicas}}" +- id: configure + type: Configure + params: + nodes: + - type: dgxa100.80g + count: 3 + labels: + nvidia.com/gpu.count: "8" + timeout: 1m +- id: default-queue + type: SubmitObj + params: + refTaskId: register-queue + canExist: true + params: + name: default +- id: test-queue + type: SubmitObj + params: + refTaskId: register-queue + canExist: true + params: + name: test + parentQueue: default +- id: job + type: SubmitObj + params: + refTaskId: register-job + count: 1 + params: + namespace: default + queue: test + replicas: 3 + image: ubuntu + cpu: 100m + memory: 250M + gpu: 8 + ttl: "20s" +- id: status + type: CheckPod + params: + refTaskId: job + status: Running + timeout: 10s diff --git a/resources/workflows/kai/test-mpijob.yaml b/resources/workflows/kai/test-mpijob.yaml index 80739384..eeff90da 100644 --- a/resources/workflows/kai/test-mpijob.yaml +++ b/resources/workflows/kai/test-mpijob.yaml @@ -13,7 +13,7 @@ # limitations under the License. name: test-kai-mpijob -description: register, deploy and configure run:ai custom resources +description: submit and validate an mpijob with kai scheduler tasks: - id: register-queue type: RegisterObj diff --git a/scripts/benchmarks/gang-scheduling/run-kai.sh b/scripts/benchmarks/gang-scheduling/run-kai.sh new file mode 100755 index 00000000..4423a420 --- /dev/null +++ b/scripts/benchmarks/gang-scheduling/run-kai.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +REPO_HOME=$(readlink -f $(dirname $(readlink -f "$0"))/../../../) + +$REPO_HOME/bin/knavigator -workflow "$REPO_HOME/resources/benchmarks/gang-scheduling/workflows/{config-nodes.yaml,config-kai.yaml,run-test.yaml}" diff --git a/scripts/benchmarks/scaling/run-kai.sh b/scripts/benchmarks/scaling/run-kai.sh new file mode 100755 index 00000000..27b4bcb5 --- /dev/null +++ b/scripts/benchmarks/scaling/run-kai.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +REPO_HOME=$(readlink -f $(dirname $(readlink -f "$0"))/../../../) + +$REPO_HOME/bin/knavigator -workflow "$REPO_HOME/resources/benchmarks/scaling/workflows/{config-nodes.yaml,config-kai.yaml,run-test-single.yaml}" diff --git a/scripts/env.sh b/scripts/env.sh index 3fec10ce..c631f956 100644 --- a/scripts/env.sh +++ b/scripts/env.sh @@ -69,12 +69,10 @@ function wait_for_pods() { # KWOK # - -KWOK_REPO=kubernetes-sigs/kwok -KWOK_RELEASE="v0.6.1" - function deploy_kwok() { printGreen Deploying KWOK + KWOK_REPO=kubernetes-sigs/kwok + KWOK_RELEASE="v0.6.1" # Deploy KWOK controller kubectl apply -f https://github.com/${KWOK_REPO}/releases/download/${KWOK_RELEASE}/kwok.yaml @@ -88,11 +86,9 @@ function deploy_kwok() { # Prometheus # - -PROMETHEUS_STACK_VERSION=61.5.0 - function deploy_prometheus() { printGreen Deploying Prometheus + PROMETHEUS_STACK_VERSION=61.5.0 helm repo add --force-update prometheus-community https://prometheus-community.github.io/helm-charts @@ -121,10 +117,9 @@ function deploy_prometheus() { # # https://github.com/kubernetes-sigs/jobset -JOBSET_VERSION=v0.7.0 - function deploy_jobset() { printGreen Deploying jobset + JOBSET_VERSION=v0.8.1 kubectl apply --server-side -f https://github.com/kubernetes-sigs/jobset/releases/download/${JOBSET_VERSION}/manifests.yaml @@ -137,10 +132,9 @@ function deploy_jobset() { } # https://github.com/kubernetes-sigs/kueue -KUEUE_VERSION=v0.9.0 - function deploy_kueue() { printGreen Deploying kueue + KUEUE_VERSION=v0.11.4 kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/${KUEUE_VERSION}/manifests.yaml @@ -153,10 +147,9 @@ function deploy_kueue() { } # https://github.com/volcano-sh/volcano -VOLCANO_VERSION=v1.10.0 - function deploy_volcano() { printGreen Deploying volcano + VOLCANO_VERSION=v1.11.2 helm repo add --force-update volcano-sh https://volcano-sh.github.io/helm-charts @@ -174,10 +167,9 @@ function deploy_volcano() { } # https://github.com/apache/yunikorn-core -YUNIKORN_VERSION=v1.6.0 - function deploy_yunikorn() { printGreen Deploying yunikorn + YUNIKORN_VERSION=v1.6.2 helm repo add --force-update yunikorn https://apache.github.io/yunikorn-release @@ -189,12 +181,11 @@ function deploy_yunikorn() { } # https://www.run.ai/ -TRAINING_OPERATOR_VERSION=v1.8.0 -MPI_OPERATOR_VERSION=v0.4.0 -RUNAI_VERSION=2.18.49 - function deploy_runai() { printGreen Deploying run:ai + TRAINING_OPERATOR_VERSION=v1.8.0 + MPI_OPERATOR_VERSION=v0.4.0 + RUNAI_VERSION=2.18.49 if [[ -z "$RUNAI_CONTROL_PLANE_URL" ]] || [[ -z "$RUNAI_CLIENT_SECRET" ]] || [[ -z "$RUNAI_CLUSTER_ID" ]]; then printRed " @@ -232,29 +223,21 @@ Run:ai deployment requires environment variables: } # https://github.com/NVIDIA/KAI-Scheduler/ -TRAINING_OPERATOR_VERSION=v1.8.0 -MPI_OPERATOR_VERSION=v0.4.0 function deploy_kai() { printGreen Deploying kai + MPI_OPERATOR_VERSION=v0.6.0 + KAI_VERSION=v0.4.7 - kubectl apply -k "github.com/kubeflow/training-operator/manifests/overlays/standalone?ref=$TRAINING_OPERATOR_VERSION" + kubectl apply --server-side -f https://raw.githubusercontent.com/kubeflow/mpi-operator/$MPI_OPERATOR_VERSION/deploy/v2beta1/mpi-operator.yaml - kubectl patch deployment training-operator -n kubeflow --type='json' \ - -p='[{"op": "add", "path": "/spec/template/spec/containers/0/args", "value": ["--enable-scheme=tfjob", "--enable-scheme=pytorchjob", "--enable-scheme=xgboostjob"]}]' - - kubectl delete crd mpijobs.kubeflow.org - - kubectl apply -f https://raw.githubusercontent.com/kubeflow/mpi-operator/$MPI_OPERATOR_VERSION/deploy/v2beta1/mpi-operator.yaml - - helm repo add --force-update nvidia-k8s https://helm.ngc.nvidia.com/nvidia/k8s - helm repo update - helm upgrade --install kai-scheduler nvidia-k8s/kai-scheduler -n kai-scheduler \ - --create-namespace --wait --set "global.registry=nvcr.io/nvidia/k8s" + helm upgrade --install kai-scheduler oci://ghcr.io/nvidia/kai-scheduler/kai-scheduler -n kai-scheduler \ + --version="$KAI_VERSION" --create-namespace --wait } -SCHEDULER_PLUGINS_VERSION=v0.29.7 + function deploy_scheduler_plugins() { printGreen Deploying scheduler-plugins + SCHEDULER_PLUGINS_VERSION=v0.29.7 helm upgrade --install --repo https://scheduler-plugins.sigs.k8s.io scheduler-plugins scheduler-plugins \ -n scheduler-plugins --create-namespace --version $SCHEDULER_PLUGINS_VERSION \