File tree Expand file tree Collapse file tree 8 files changed +162
-11
lines changed
Expand file tree Collapse file tree 8 files changed +162
-11
lines changed Original file line number Diff line number Diff line change 1+ apiVersion : kustomize.config.k8s.io/v1beta1
2+ kind : Kustomization
3+
4+ metadata :
5+ name : facebook-opt-125m-cpu-single-node-no-scheduler-cpu
6+
7+ namespace : llm
8+
9+ namePrefix : facebook-opt-125m-cpu-
10+
11+ resources :
12+ - model.yaml
13+ - ../rbac/
14+
15+ patches :
16+ - patch : |-
17+ - op: add
18+ path: /rules/0/resourceNames
19+ value: ["facebook-opt-125m-cpu-single-node-no-scheduler-cpu"]
20+ target:
21+ kind: Role
22+ name: model-user
23+ - patch : |-
24+ - op: replace
25+ path: /roleRef/name
26+ value: facebook-opt-125m-cpu-model-user
27+ target:
28+ kind: RoleBinding
29+ name: model-user-tier-binding
Original file line number Diff line number Diff line change 1+ apiVersion : serving.kserve.io/v1alpha1
2+ kind : LLMInferenceService
3+ metadata :
4+ name : single-node-no-scheduler-cpu
5+ spec :
6+ model :
7+ uri : hf://facebook/opt-125m
8+ name : facebook/opt-125m
9+ replicas : 1
10+ router :
11+ route : { }
12+ template :
13+ containers :
14+ - name : main
15+ image : quay.io/pierdipi/vllm-cpu:latest
16+ env :
17+ - name : VLLM_LOGGING_LEVEL
18+ value : DEBUG
19+ resources :
20+ limits :
21+ cpu : ' 1'
22+ memory : 10Gi
23+ requests :
24+ cpu : ' 100m'
25+ memory : 8Gi
26+ livenessProbe :
27+ initialDelaySeconds : 30
28+ periodSeconds : 30
29+ timeoutSeconds : 30
30+ failureThreshold : 5
Original file line number Diff line number Diff line change 1+ apiVersion : kustomize.config.k8s.io/v1beta1
2+ kind : Kustomization
3+
4+ metadata :
5+ name : qwen3-single-node-no-scheduler-nvidia-gpu
6+
7+ namespace : llm
8+
9+ namePrefix : qwen3-
10+
11+ resources :
12+ - model.yaml
13+ - ../rbac/
14+
15+ patches :
16+ - patch : |-
17+ - op: add
18+ path: /rules/0/resourceNames
19+ value: ["qwen3-single-node-no-scheduler-nvidia-gpu"]
20+ target :
21+ kind : Role
22+ name : model-user
23+ - patch : |-
24+ - op: replace
25+ path: /roleRef/name
26+ value: qwen3-model-user
27+ target :
28+ kind : RoleBinding
29+ name : model-user-tier-binding
30+
Original file line number Diff line number Diff line change 1+ apiVersion : serving.kserve.io/v1alpha1
2+ kind : LLMInferenceService
3+ metadata :
4+ name : single-node-no-scheduler-nvidia-gpu
5+ spec :
6+ model :
7+ uri : hf://Qwen/Qwen3-0.6B
8+ name : Qwen/Qwen3-0.6B
9+ replicas : 1
10+ router :
11+ route : { }
12+ template :
13+ nodeSelector :
14+ nvidia.com/gpu.present : " true"
15+ tolerations :
16+ - effect : NoSchedule
17+ key : nvidia.com/gpu
18+ operator : Exists
19+ containers :
20+ - name : main
21+ resources :
22+ limits :
23+ cpu : " 4"
24+ memory : 8Gi
25+ nvidia.com/gpu : " 1"
26+ requests :
27+ cpu : " 1"
28+ memory : 4Gi
29+ nvidia.com/gpu : " 1"
30+ livenessProbe :
31+ httpGet :
32+ path : /health
33+ port : 8000
34+ scheme : HTTPS
35+ initialDelaySeconds : 120
36+ periodSeconds : 30
37+ timeoutSeconds : 30
38+ failureThreshold : 5
Original file line number Diff line number Diff line change 22apiVersion : rbac.authorization.k8s.io/v1
33kind : Role
44metadata :
5- name : models-user
6- namespace : llm
5+ name : model-user
76rules :
87 - apiGroups : ["serving.kserve.io"]
98 resources : ["llminferenceservices"]
1211apiVersion : rbac.authorization.k8s.io/v1
1312kind : RoleBinding
1413metadata :
15- name : model-users-tier-binding
16- namespace : llm
14+ name : model-user-tier-binding
1715subjects :
1816 - kind : Group
1917 name : system:serviceaccounts:openshift-ai-inference-tier-free
@@ -26,5 +24,5 @@ subjects:
2624 apiGroup : rbac.authorization.k8s.io
2725roleRef :
2826 kind : Role
29- name : models -user
27+ name : model -user
3028 apiGroup : rbac.authorization.k8s.io
Original file line number Diff line number Diff line change 1+ apiVersion : kustomize.config.k8s.io/v1beta1
2+ kind : Kustomization
3+
4+ metadata :
5+ name : maas-tiers-rbac
6+
7+ namespace : llm
8+
9+ resources :
10+ - all-tiers.yaml
Original file line number Diff line number Diff line change @@ -2,11 +2,28 @@ apiVersion: kustomize.config.k8s.io/v1beta1
22kind : Kustomization
33
44metadata :
5- name : vllm-simulator
5+ name : facebook-opt-125m-simulated
66
77namespace : llm
88
9+ namePrefix : facebook-opt-125m-
10+
911resources :
10- - simulated- model.yaml
11- - rbac.yaml
12+ - model.yaml
13+ - ../rbac/
1214
15+ patches :
16+ - patch : |-
17+ - op: add
18+ path: /rules/0/resourceNames
19+ value: ["facebook-opt-125m-simulated"]
20+ target:
21+ kind: Role
22+ name: model-user
23+ - patch : |-
24+ - op: replace
25+ path: /roleRef/name
26+ value: facebook-opt-125m-model-user
27+ target:
28+ kind: RoleBinding
29+ name: model-user-tier-binding
Original file line number Diff line number Diff line change 11apiVersion : serving.kserve.io/v1alpha1
22kind : LLMInferenceService
33metadata :
4- name : facebook-opt-125m-single- simulated
4+ name : simulated
55spec :
66 model :
77 uri : hf://facebook/opt-125m
1919 - --port
2020 - " 8000"
2121 - --model
22- - facebook-opt-125m-single- simulated
22+ - facebook-opt-125m-simulated
2323 - --mode
2424 - random
2525 - --ssl-certfile
5151 path : /ready
5252 port : https
5353 scheme : HTTPS
54-
You can’t perform that action at this time.
0 commit comments