Skip to content

Commit 3884502

Browse files
varunrsekarshivamerla
authored andcommitted
add nimservice samples with DRA
Signed-off-by: Varun Ramachandra Sekar <vsekar@nvidia.com>
1 parent dee5311 commit 3884502

File tree

9 files changed

+165
-2
lines changed

9 files changed

+165
-2
lines changed

config/samples/nim/llm/nimcache-llm.yaml renamed to config/samples/nim/llm/basic/nimcache-llm.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ metadata:
55
spec:
66
source:
77
ngc:
8-
modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.3.3
8+
modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3
99
pullSecret: ngc-secret
1010
authSecret: ngc-api-secret
1111
model:

config/samples/nim/llm/nimservice.yaml renamed to config/samples/nim/llm/basic/nimservice.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ metadata:
55
spec:
66
image:
77
repository: nvcr.io/nim/meta/llama-3.1-8b-instruct
8-
tag: "1.8"
8+
tag: "1.8.3"
99
pullPolicy: IfNotPresent
1010
pullSecrets:
1111
- ngc-secret
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
apiVersion: apps.nvidia.com/v1alpha1
2+
kind: NIMCache
3+
metadata:
4+
name: meta-llama3-8b-instruct
5+
spec:
6+
source:
7+
ngc:
8+
modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3
9+
pullSecret: ngc-secret
10+
authSecret: ngc-api-secret
11+
model:
12+
engine: tensorrt_llm
13+
tensorParallelism: "1"
14+
storage:
15+
pvc:
16+
create: true
17+
storageClass: ""
18+
size: "50Gi"
19+
volumeAccessMode: ReadWriteOnce
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
apiVersion: apps.nvidia.com/v1alpha1
2+
kind: NIMService
3+
metadata:
4+
name: meta-llama3-8b-instruct
5+
spec:
6+
image:
7+
repository: nvcr.io/nim/meta/llama-3.1-8b-instruct
8+
tag: "1.8.3"
9+
pullPolicy: IfNotPresent
10+
pullSecrets:
11+
- ngc-secret
12+
authSecret: ngc-api-secret
13+
storage:
14+
nimCache:
15+
name: meta-llama3-8b-instruct
16+
profile: ''
17+
replicas: 1
18+
draResources:
19+
- claimSpec:
20+
devices:
21+
- name: gpu
22+
expose:
23+
service:
24+
type: ClusterIP
25+
port: 8000
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
apiVersion: apps.nvidia.com/v1alpha1
2+
kind: NIMCache
3+
metadata:
4+
name: meta-llama3-8b-instruct
5+
spec:
6+
source:
7+
ngc:
8+
modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3
9+
pullSecret: ngc-secret
10+
authSecret: ngc-api-secret
11+
model:
12+
engine: tensorrt_llm
13+
tensorParallelism: "1"
14+
storage:
15+
pvc:
16+
create: true
17+
storageClass: ""
18+
size: "50Gi"
19+
volumeAccessMode: ReadWriteOnce
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
apiVersion: apps.nvidia.com/v1alpha1
2+
kind: NIMService
3+
metadata:
4+
name: meta-llama3-8b-instruct
5+
spec:
6+
image:
7+
repository: nvcr.io/nim/meta/llama-3.1-8b-instruct
8+
tag: "1.8.3"
9+
pullPolicy: IfNotPresent
10+
pullSecrets:
11+
- ngc-secret
12+
authSecret: ngc-api-secret
13+
storage:
14+
nimCache:
15+
name: meta-llama3-8b-instruct
16+
profile: ''
17+
replicas: 1
18+
draResources:
19+
- claimSpec:
20+
isTemplate: true
21+
devices:
22+
- name: gpu
23+
deviceClassName: gpu.nvidia.com
24+
driverName: gpu.nvidia.com
25+
matchAttributes:
26+
- key: index
27+
op: NotEqual
28+
value:
29+
intValue: 0
30+
- key: driverVersion
31+
op: GreaterThanOrEqual
32+
value:
33+
versionValue: "550.127.8"
34+
- key: architecture
35+
op: Equal
36+
value:
37+
stringValue: Ampere
38+
matchCapacity:
39+
- key: memory
40+
op: Equal
41+
value: 40Gi
42+
expose:
43+
service:
44+
type: ClusterIP
45+
port: 8000
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
apiVersion: apps.nvidia.com/v1alpha1
2+
kind: NIMCache
3+
metadata:
4+
name: meta-llama3-8b-instruct
5+
spec:
6+
source:
7+
ngc:
8+
modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3
9+
pullSecret: ngc-secret
10+
authSecret: ngc-api-secret
11+
model:
12+
engine: tensorrt_llm
13+
tensorParallelism: "1"
14+
storage:
15+
pvc:
16+
create: true
17+
storageClass: ""
18+
size: "50Gi"
19+
volumeAccessMode: ReadWriteOnce
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
apiVersion: apps.nvidia.com/v1alpha1
2+
kind: NIMService
3+
metadata:
4+
name: meta-llama3-8b-instruct
5+
spec:
6+
image:
7+
repository: nvcr.io/nim/meta/llama-3.1-8b-instruct
8+
tag: "1.8.3"
9+
pullPolicy: IfNotPresent
10+
pullSecrets:
11+
- ngc-secret
12+
authSecret: ngc-api-secret
13+
storage:
14+
nimCache:
15+
name: meta-llama3-8b-instruct
16+
profile: ''
17+
replicas: 1
18+
draResources:
19+
- resourceClaimTemplateName: gpu-resourceclaimtemplate
20+
expose:
21+
service:
22+
type: ClusterIP
23+
port: 8000
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
apiVersion: resource.k8s.io/v1beta2
2+
kind: ResourceClaimTemplate
3+
metadata:
4+
name: gpu-resourceclaimtemplate
5+
spec:
6+
spec:
7+
devices:
8+
requests:
9+
- exactly:
10+
allocationMode: ExactCount
11+
count: 1
12+
deviceClassName: gpu.nvidia.com
13+
name: gpu

0 commit comments

Comments
 (0)