Skip to content

Commit c67005d

Browse files
feat: additional sample models (#96)
This PR brings two additional sample models as LLMInferenceServices: - Small [facebook/opt-125m](https://huggingface.co/facebook/opt-125m) model for CPU runtime - [Qwen3-0.6B](https://huggingface.co/Qwen/Qwen3-0.6B) based on existing sample from `deployments` folder RBAC for models has been reworked to allow granularity per model. Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
1 parent b2f7bb3 commit c67005d

File tree

8 files changed

+162
-11
lines changed

8 files changed

+162
-11
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
apiVersion: kustomize.config.k8s.io/v1beta1
2+
kind: Kustomization
3+
4+
metadata:
5+
name: facebook-opt-125m-cpu-single-node-no-scheduler-cpu
6+
7+
namespace: llm
8+
9+
namePrefix: facebook-opt-125m-cpu-
10+
11+
resources:
12+
- model.yaml
13+
- ../rbac/
14+
15+
patches:
16+
- patch: |-
17+
- op: add
18+
path: /rules/0/resourceNames
19+
value: ["facebook-opt-125m-cpu-single-node-no-scheduler-cpu"]
20+
target:
21+
kind: Role
22+
name: model-user
23+
- patch: |-
24+
- op: replace
25+
path: /roleRef/name
26+
value: facebook-opt-125m-cpu-model-user
27+
target:
28+
kind: RoleBinding
29+
name: model-user-tier-binding
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
apiVersion: serving.kserve.io/v1alpha1
2+
kind: LLMInferenceService
3+
metadata:
4+
name: single-node-no-scheduler-cpu
5+
spec:
6+
model:
7+
uri: hf://facebook/opt-125m
8+
name: facebook/opt-125m
9+
replicas: 1
10+
router:
11+
route: { }
12+
template:
13+
containers:
14+
- name: main
15+
image: quay.io/pierdipi/vllm-cpu:latest
16+
env:
17+
- name: VLLM_LOGGING_LEVEL
18+
value: DEBUG
19+
resources:
20+
limits:
21+
cpu: '1'
22+
memory: 10Gi
23+
requests:
24+
cpu: '100m'
25+
memory: 8Gi
26+
livenessProbe:
27+
initialDelaySeconds: 30
28+
periodSeconds: 30
29+
timeoutSeconds: 30
30+
failureThreshold: 5
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
apiVersion: kustomize.config.k8s.io/v1beta1
2+
kind: Kustomization
3+
4+
metadata:
5+
name: qwen3-single-node-no-scheduler-nvidia-gpu
6+
7+
namespace: llm
8+
9+
namePrefix: qwen3-
10+
11+
resources:
12+
- model.yaml
13+
- ../rbac/
14+
15+
patches:
16+
- patch: |-
17+
- op: add
18+
path: /rules/0/resourceNames
19+
value: ["qwen3-single-node-no-scheduler-nvidia-gpu"]
20+
target:
21+
kind: Role
22+
name: model-user
23+
- patch: |-
24+
- op: replace
25+
path: /roleRef/name
26+
value: qwen3-model-user
27+
target:
28+
kind: RoleBinding
29+
name: model-user-tier-binding
30+
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
apiVersion: serving.kserve.io/v1alpha1
2+
kind: LLMInferenceService
3+
metadata:
4+
name: single-node-no-scheduler-nvidia-gpu
5+
spec:
6+
model:
7+
uri: hf://Qwen/Qwen3-0.6B
8+
name: Qwen/Qwen3-0.6B
9+
replicas: 1
10+
router:
11+
route: { }
12+
template:
13+
nodeSelector:
14+
nvidia.com/gpu.present: "true"
15+
tolerations:
16+
- effect: NoSchedule
17+
key: nvidia.com/gpu
18+
operator: Exists
19+
containers:
20+
- name: main
21+
resources:
22+
limits:
23+
cpu: "4"
24+
memory: 8Gi
25+
nvidia.com/gpu: "1"
26+
requests:
27+
cpu: "1"
28+
memory: 4Gi
29+
nvidia.com/gpu: "1"
30+
livenessProbe:
31+
httpGet:
32+
path: /health
33+
port: 8000
34+
scheme: HTTPS
35+
initialDelaySeconds: 120
36+
periodSeconds: 30
37+
timeoutSeconds: 30
38+
failureThreshold: 5

maas-api/deploy/models/simulator/rbac.yaml renamed to maas-api/deploy/models/rbac/all-tiers.yaml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22
apiVersion: rbac.authorization.k8s.io/v1
33
kind: Role
44
metadata:
5-
name: models-user
6-
namespace: llm
5+
name: model-user
76
rules:
87
- apiGroups: ["serving.kserve.io"]
98
resources: ["llminferenceservices"]
@@ -12,8 +11,7 @@ rules:
1211
apiVersion: rbac.authorization.k8s.io/v1
1312
kind: RoleBinding
1413
metadata:
15-
name: model-users-tier-binding
16-
namespace: llm
14+
name: model-user-tier-binding
1715
subjects:
1816
- kind: Group
1917
name: system:serviceaccounts:openshift-ai-inference-tier-free
@@ -26,5 +24,5 @@ subjects:
2624
apiGroup: rbac.authorization.k8s.io
2725
roleRef:
2826
kind: Role
29-
name: models-user
27+
name: model-user
3028
apiGroup: rbac.authorization.k8s.io
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
apiVersion: kustomize.config.k8s.io/v1beta1
2+
kind: Kustomization
3+
4+
metadata:
5+
name: maas-tiers-rbac
6+
7+
namespace: llm
8+
9+
resources:
10+
- all-tiers.yaml

maas-api/deploy/models/simulator/kustomization.yaml

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,28 @@ apiVersion: kustomize.config.k8s.io/v1beta1
22
kind: Kustomization
33

44
metadata:
5-
name: vllm-simulator
5+
name: facebook-opt-125m-simulated
66

77
namespace: llm
88

9+
namePrefix: facebook-opt-125m-
10+
911
resources:
10-
- simulated-model.yaml
11-
- rbac.yaml
12+
- model.yaml
13+
- ../rbac/
1214

15+
patches:
16+
- patch: |-
17+
- op: add
18+
path: /rules/0/resourceNames
19+
value: ["facebook-opt-125m-simulated"]
20+
target:
21+
kind: Role
22+
name: model-user
23+
- patch: |-
24+
- op: replace
25+
path: /roleRef/name
26+
value: facebook-opt-125m-model-user
27+
target:
28+
kind: RoleBinding
29+
name: model-user-tier-binding

maas-api/deploy/models/simulator/simulated-model.yaml renamed to maas-api/deploy/models/simulator/model.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
apiVersion: serving.kserve.io/v1alpha1
22
kind: LLMInferenceService
33
metadata:
4-
name: facebook-opt-125m-single-simulated
4+
name: simulated
55
spec:
66
model:
77
uri: hf://facebook/opt-125m
@@ -19,7 +19,7 @@ spec:
1919
- --port
2020
- "8000"
2121
- --model
22-
- facebook-opt-125m-single-simulated
22+
- facebook-opt-125m-simulated
2323
- --mode
2424
- random
2525
- --ssl-certfile
@@ -51,4 +51,3 @@ spec:
5151
path: /ready
5252
port: https
5353
scheme: HTTPS
54-

0 commit comments

Comments
 (0)