-
Notifications
You must be signed in to change notification settings - Fork 44
Expand file tree
/
Copy pathnimservice.yaml
More file actions
64 lines (64 loc) · 1.35 KB
/
nimservice.yaml
File metadata and controls
64 lines (64 loc) · 1.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMService
metadata:
name: meta-llama-3-2-1b-instruct
namespace: nim-service
spec:
image:
repository: nvcr.io/nim/meta/llama-3.2-1b-instruct
tag: "1.8"
pullPolicy: IfNotPresent
pullSecrets:
- ngc-secret
authSecret: ngc-api-secret
storage:
nimCache:
name: meta-llama-3-2-1b-instruct
profile: '4f904d571fe60ff24695b5ee2aa42da58cb460787a968f1e8a09f5a7e862728d'
replicas: 1
resources:
limits:
nvidia.com/gpu: 1
cpu: "12"
memory: 32Gi
requests:
nvidia.com/gpu: 1
cpu: "12"
memory: 32Gi
expose:
service:
type: ClusterIP
port: 8000
tolerations:
- effect: NoSchedule
key: p4-gpu
operator: Exists
nodeSelector:
node.kubernetes.io/instance-type: p4d.24xlarge
livenessProbe:
enabled: true
probe:
httpGet:
path: /v1/models
port: 8000
initialDelaySeconds: 120
timeoutSeconds: 300
periodSeconds: 10
readinessProbe:
enabled: true
probe:
httpGet:
path: /v1/models
port: 8000
initialDelaySeconds: 120
timeoutSeconds: 300
periodSeconds: 10
startupProbe:
enabled: true
probe:
httpGet:
path: /v1/models
port: 8000
initialDelaySeconds: 120
timeoutSeconds: 300
periodSeconds: 10