Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
spec:
source:
ngc:
modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.3.3
modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3
pullSecret: ngc-secret
authSecret: ngc-api-secret
model:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
spec:
image:
repository: nvcr.io/nim/meta/llama-3.1-8b-instruct
tag: "1.8"
tag: "1.8.3"
pullPolicy: IfNotPresent
pullSecrets:
- ngc-secret
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMCache
metadata:
name: meta-llama3-8b-instruct
spec:
source:
ngc:
modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3
pullSecret: ngc-secret
authSecret: ngc-api-secret
model:
engine: tensorrt_llm
tensorParallelism: "1"
storage:
pvc:
create: true
storageClass: ""
size: "50Gi"
volumeAccessMode: ReadWriteOnce
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMService
metadata:
name: meta-llama3-8b-instruct
spec:
image:
repository: nvcr.io/nim/meta/llama-3.1-8b-instruct
tag: "1.8.3"
pullPolicy: IfNotPresent
pullSecrets:
- ngc-secret
authSecret: ngc-api-secret
storage:
nimCache:
name: meta-llama3-8b-instruct
profile: ''
replicas: 1
draResources:
- claimSpec:
devices:
- name: gpu
expose:
service:
type: ClusterIP
port: 8000
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMCache
metadata:
name: meta-llama3-8b-instruct
spec:
source:
ngc:
modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3
pullSecret: ngc-secret
authSecret: ngc-api-secret
model:
engine: tensorrt_llm
tensorParallelism: "1"
storage:
pvc:
create: true
storageClass: ""
size: "50Gi"
volumeAccessMode: ReadWriteOnce
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMService
metadata:
name: meta-llama3-8b-instruct
spec:
image:
repository: nvcr.io/nim/meta/llama-3.1-8b-instruct
tag: "1.8.3"
pullPolicy: IfNotPresent
pullSecrets:
- ngc-secret
authSecret: ngc-api-secret
storage:
nimCache:
name: meta-llama3-8b-instruct
profile: ''
replicas: 1
draResources:
- claimSpec:
isTemplate: true
devices:
- name: gpu
deviceClassName: gpu.nvidia.com
driverName: gpu.nvidia.com
matchAttributes:
- key: index
op: NotEqual
value:
intValue: 0
- key: driverVersion
op: GreaterThanOrEqual
value:
versionValue: "550.127.8"
- key: architecture
op: Equal
value:
stringValue: Ampere
matchCapacity:
- key: memory
op: Equal
value: 40Gi
expose:
service:
type: ClusterIP
port: 8000
19 changes: 19 additions & 0 deletions config/samples/nim/llm/dra/nimcache-llm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMCache
metadata:
name: meta-llama3-8b-instruct
spec:
source:
ngc:
modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3
pullSecret: ngc-secret
authSecret: ngc-api-secret
model:
engine: tensorrt_llm
tensorParallelism: "1"
storage:
pvc:
create: true
storageClass: ""
size: "50Gi"
volumeAccessMode: ReadWriteOnce
23 changes: 23 additions & 0 deletions config/samples/nim/llm/dra/nimservice.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMService
metadata:
name: meta-llama3-8b-instruct
spec:
image:
repository: nvcr.io/nim/meta/llama-3.1-8b-instruct
tag: "1.8.3"
pullPolicy: IfNotPresent
pullSecrets:
- ngc-secret
authSecret: ngc-api-secret
storage:
nimCache:
name: meta-llama3-8b-instruct
profile: ''
replicas: 1
draResources:
- resourceClaimTemplateName: gpu-resourceclaimtemplate
expose:
service:
type: ClusterIP
port: 8000
13 changes: 13 additions & 0 deletions config/samples/nim/llm/dra/resourceclaimtemplate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: resource.k8s.io/v1beta2
kind: ResourceClaimTemplate
metadata:
name: gpu-resourceclaimtemplate
spec:
spec:
devices:
requests:
- exactly:
allocationMode: ExactCount
count: 1
deviceClassName: gpu.nvidia.com
name: gpu
Loading