-
Notifications
You must be signed in to change notification settings - Fork 44
Expand file tree
/
Copy pathnimcache.yaml
More file actions
40 lines (40 loc) · 1003 Bytes
/
nimcache.yaml
File metadata and controls
40 lines (40 loc) · 1003 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMCache
metadata:
labels:
app.kubernetes.io/name: k8s-nim-operator
name: meta-llama-3-2-1b-instruct
namespace: nim-service
spec:
source:
ngc:
modelPuller: nvcr.io/nim/meta/llama-3.2-1b-instruct:1.8
pullSecret: ngc-secret
authSecret: ngc-api-secret
model:
profiles:
- 4f904d571fe60ff24695b5ee2aa42da58cb460787a968f1e8a09f5a7e862728d
lora: false
precision: "bf16"
engine: "vllm"
qosProfile: "throughput"
# gpus:
# - product: "A100"
# ids:
# - "20b2"
tensorParallelism: "1"
resources:
cpu: 500m
memory: 20Gi
storage:
pvc:
create: true
storageClass: "gp3-csi"
size: "50Gi"
volumeAccessMode: ReadWriteOnce
tolerations:
- effect: NoSchedule
key: p4-gpu
operator: Exists
nodeSelector:
node.kubernetes.io/instance-type: p4d.24xlarge