feat: additional sample models (#96)

bartoszmajsak · web-flow · commit c67005df7503 · 2025-09-29T17:34:51.000+02:00
This PR brings two additional sample models as LLMInferenceServices: - Small [facebook/opt-125m](https://huggingface.co/facebook/opt-125m) model for CPU runtime - [Qwen3-0.6B](https://huggingface.co/Qwen/Qwen3-0.6B) based on existing sample from `deployments` folder RBAC for models has been reworked to allow granularity per model. Signed-off-by: Bartosz Majsak <bartosz.majsak@gmail.com>
diff --git a/maas-api/deploy/models/facebook-opt-125m-cpu/kustomization.yaml b/maas-api/deploy/models/facebook-opt-125m-cpu/kustomization.yaml
@@ -0,0 +1,29 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+metadata:
+  name: facebook-opt-125m-cpu-single-node-no-scheduler-cpu
+
+namespace: llm
+
+namePrefix: facebook-opt-125m-cpu-
+
+resources:
+- model.yaml
+- ../rbac/
+
+patches:
+  - patch: |-
+      - op: add
+        path: /rules/0/resourceNames
+        value: ["facebook-opt-125m-cpu-single-node-no-scheduler-cpu"]
+    target:
+      kind: Role
+      name: model-user
+  - patch: |-
+      - op: replace
+        path: /roleRef/name
+        value: facebook-opt-125m-cpu-model-user
+    target:
+      kind: RoleBinding
+      name: model-user-tier-binding
diff --git a/maas-api/deploy/models/facebook-opt-125m-cpu/model.yaml b/maas-api/deploy/models/facebook-opt-125m-cpu/model.yaml
@@ -0,0 +1,30 @@
+apiVersion: serving.kserve.io/v1alpha1
+kind: LLMInferenceService
+metadata:
+  name: single-node-no-scheduler-cpu
+spec:
+  model:
+    uri: hf://facebook/opt-125m
+    name: facebook/opt-125m
+  replicas: 1
+  router:
+    route: { }
+  template:
+    containers:
+      - name: main
+        image: quay.io/pierdipi/vllm-cpu:latest
+        env:
+          - name: VLLM_LOGGING_LEVEL
+            value: DEBUG
+        resources:
+          limits:
+            cpu: '1'
+            memory: 10Gi
+          requests:
+            cpu: '100m'
+            memory: 8Gi
+        livenessProbe:
+          initialDelaySeconds: 30
+          periodSeconds: 30
+          timeoutSeconds: 30
+          failureThreshold: 5
diff --git a/maas-api/deploy/models/qwen3-0.6B/kustomization.yaml b/maas-api/deploy/models/qwen3-0.6B/kustomization.yaml
@@ -0,0 +1,30 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+metadata:
+  name: qwen3-single-node-no-scheduler-nvidia-gpu
+
+namespace: llm
+
+namePrefix: qwen3-
+
+resources:
+- model.yaml
+- ../rbac/
+
+patches:
+- patch: |-
+    - op: add
+      path: /rules/0/resourceNames
+      value: ["qwen3-single-node-no-scheduler-nvidia-gpu"]
+  target:
+    kind: Role
+    name: model-user
+- patch: |-
+    - op: replace
+      path: /roleRef/name
+      value: qwen3-model-user
+  target:
+    kind: RoleBinding
+    name: model-user-tier-binding
+
diff --git a/maas-api/deploy/models/qwen3-0.6B/model.yaml b/maas-api/deploy/models/qwen3-0.6B/model.yaml
@@ -0,0 +1,38 @@
+apiVersion: serving.kserve.io/v1alpha1
+kind: LLMInferenceService
+metadata:
+  name: single-node-no-scheduler-nvidia-gpu
+spec:
+  model:
+    uri: hf://Qwen/Qwen3-0.6B
+    name: Qwen/Qwen3-0.6B
+  replicas: 1
+  router:
+    route: { }
+  template:
+    nodeSelector:
+      nvidia.com/gpu.present: "true"
+    tolerations:
+      - effect: NoSchedule
+        key: nvidia.com/gpu
+        operator: Exists
+    containers:
+      - name: main
+        resources:
+          limits:
+            cpu: "4"
+            memory: 8Gi
+            nvidia.com/gpu: "1"
+          requests:
+            cpu: "1"
+            memory: 4Gi
+            nvidia.com/gpu: "1"
+        livenessProbe:
+          httpGet:
+            path: /health
+            port: 8000
+            scheme: HTTPS
+          initialDelaySeconds: 120
+          periodSeconds: 30
+          timeoutSeconds: 30
+          failureThreshold: 5
diff --git a/maas-api/deploy/models/rbac/all-tiers.yaml b/maas-api/deploy/models/rbac/all-tiers.yaml
@@ -2,8 +2,7 @@
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
-  name: models-user
-  namespace: llm
+  name: model-user
 rules:
   - apiGroups: ["serving.kserve.io"]
     resources: ["llminferenceservices"]
@@ -12,8 +11,7 @@ rules:
 apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
 metadata:
-  name: model-users-tier-binding
-  namespace: llm
+  name: model-user-tier-binding
 subjects:
   - kind: Group
     name: system:serviceaccounts:openshift-ai-inference-tier-free
@@ -26,5 +24,5 @@ subjects:
     apiGroup: rbac.authorization.k8s.io
 roleRef:
   kind: Role
-  name: models-user
+  name: model-user
   apiGroup: rbac.authorization.k8s.io
diff --git a/maas-api/deploy/models/rbac/kustomization.yaml b/maas-api/deploy/models/rbac/kustomization.yaml
@@ -0,0 +1,10 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+metadata:
+  name: maas-tiers-rbac
+
+namespace: llm
+
+resources:
+- all-tiers.yaml
diff --git a/maas-api/deploy/models/simulator/kustomization.yaml b/maas-api/deploy/models/simulator/kustomization.yaml
@@ -2,11 +2,28 @@ apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 
 metadata:
-  name: vllm-simulator
+  name: facebook-opt-125m-simulated
 
 namespace: llm
 
+namePrefix: facebook-opt-125m-
+
 resources:
-- simulated-model.yaml
-- rbac.yaml
+- model.yaml
+- ../rbac/
 
+patches:
+  - patch: |-
+      - op: add
+        path: /rules/0/resourceNames
+        value: ["facebook-opt-125m-simulated"]
+    target:
+      kind: Role
+      name: model-user
+  - patch: |-
+      - op: replace
+        path: /roleRef/name
+        value: facebook-opt-125m-model-user
+    target:
+      kind: RoleBinding
+      name: model-user-tier-binding
diff --git a/maas-api/deploy/models/simulator/model.yaml b/maas-api/deploy/models/simulator/model.yaml
@@ -1,7 +1,7 @@
 apiVersion: serving.kserve.io/v1alpha1
 kind: LLMInferenceService
 metadata:
-  name: facebook-opt-125m-single-simulated
+  name: simulated
 spec:
   model:
     uri: hf://facebook/opt-125m
@@ -19,7 +19,7 @@ spec:
         - --port
         - "8000" 
         - --model
-        - facebook-opt-125m-single-simulated
+        - facebook-opt-125m-simulated
         - --mode
         - random
         - --ssl-certfile
@@ -51,4 +51,3 @@ spec:
             path: /ready
             port: https
             scheme: HTTPS
-