diff --git a/config/samples/nim/llm/nimcache-llm.yaml b/config/samples/nim/llm/basic/nimcache-llm.yaml similarity index 85% rename from config/samples/nim/llm/nimcache-llm.yaml rename to config/samples/nim/llm/basic/nimcache-llm.yaml index 820641ae5..7fb38d952 100644 --- a/config/samples/nim/llm/nimcache-llm.yaml +++ b/config/samples/nim/llm/basic/nimcache-llm.yaml @@ -5,7 +5,7 @@ metadata: spec: source: ngc: - modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.3.3 + modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3 pullSecret: ngc-secret authSecret: ngc-api-secret model: diff --git a/config/samples/nim/llm/nimservice.yaml b/config/samples/nim/llm/basic/nimservice.yaml similarity index 96% rename from config/samples/nim/llm/nimservice.yaml rename to config/samples/nim/llm/basic/nimservice.yaml index 08f2c9678..ec74bf120 100644 --- a/config/samples/nim/llm/nimservice.yaml +++ b/config/samples/nim/llm/basic/nimservice.yaml @@ -5,7 +5,7 @@ metadata: spec: image: repository: nvcr.io/nim/meta/llama-3.1-8b-instruct - tag: "1.8" + tag: "1.8.3" pullPolicy: IfNotPresent pullSecrets: - ngc-secret diff --git a/config/samples/nim/llm/dra-auto-creation/example0/nimcache-llm.yaml b/config/samples/nim/llm/dra-auto-creation/example0/nimcache-llm.yaml new file mode 100644 index 000000000..7fb38d952 --- /dev/null +++ b/config/samples/nim/llm/dra-auto-creation/example0/nimcache-llm.yaml @@ -0,0 +1,19 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NIMCache +metadata: + name: meta-llama3-8b-instruct +spec: + source: + ngc: + modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3 + pullSecret: ngc-secret + authSecret: ngc-api-secret + model: + engine: tensorrt_llm + tensorParallelism: "1" + storage: + pvc: + create: true + storageClass: "" + size: "50Gi" + volumeAccessMode: ReadWriteOnce diff --git a/config/samples/nim/llm/dra-auto-creation/example0/nimservice.yaml b/config/samples/nim/llm/dra-auto-creation/example0/nimservice.yaml new file mode 100644 index 000000000..234ffb69e --- /dev/null +++ b/config/samples/nim/llm/dra-auto-creation/example0/nimservice.yaml @@ -0,0 +1,25 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NIMService +metadata: + name: meta-llama3-8b-instruct +spec: + image: + repository: nvcr.io/nim/meta/llama-3.1-8b-instruct + tag: "1.8.3" + pullPolicy: IfNotPresent + pullSecrets: + - ngc-secret + authSecret: ngc-api-secret + storage: + nimCache: + name: meta-llama3-8b-instruct + profile: '' + replicas: 1 + draResources: + - claimSpec: + devices: + - name: gpu + expose: + service: + type: ClusterIP + port: 8000 diff --git a/config/samples/nim/llm/dra-auto-creation/example1/nimcache-llm.yaml b/config/samples/nim/llm/dra-auto-creation/example1/nimcache-llm.yaml new file mode 100644 index 000000000..7fb38d952 --- /dev/null +++ b/config/samples/nim/llm/dra-auto-creation/example1/nimcache-llm.yaml @@ -0,0 +1,19 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NIMCache +metadata: + name: meta-llama3-8b-instruct +spec: + source: + ngc: + modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3 + pullSecret: ngc-secret + authSecret: ngc-api-secret + model: + engine: tensorrt_llm + tensorParallelism: "1" + storage: + pvc: + create: true + storageClass: "" + size: "50Gi" + volumeAccessMode: ReadWriteOnce diff --git a/config/samples/nim/llm/dra-auto-creation/example1/nimservice.yaml b/config/samples/nim/llm/dra-auto-creation/example1/nimservice.yaml new file mode 100644 index 000000000..698d9bd38 --- /dev/null +++ b/config/samples/nim/llm/dra-auto-creation/example1/nimservice.yaml @@ -0,0 +1,45 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NIMService +metadata: + name: meta-llama3-8b-instruct +spec: + image: + repository: nvcr.io/nim/meta/llama-3.1-8b-instruct + tag: "1.8.3" + pullPolicy: IfNotPresent + pullSecrets: + - ngc-secret + authSecret: ngc-api-secret + storage: + nimCache: + name: meta-llama3-8b-instruct + profile: '' + replicas: 1 + draResources: + - claimSpec: + isTemplate: true + devices: + - name: gpu + deviceClassName: gpu.nvidia.com + driverName: gpu.nvidia.com + matchAttributes: + - key: index + op: NotEqual + value: + intValue: 0 + - key: driverVersion + op: GreaterThanOrEqual + value: + versionValue: "550.127.8" + - key: architecture + op: Equal + value: + stringValue: Ampere + matchCapacity: + - key: memory + op: Equal + value: 40Gi + expose: + service: + type: ClusterIP + port: 8000 diff --git a/config/samples/nim/llm/dra/nimcache-llm.yaml b/config/samples/nim/llm/dra/nimcache-llm.yaml new file mode 100644 index 000000000..7fb38d952 --- /dev/null +++ b/config/samples/nim/llm/dra/nimcache-llm.yaml @@ -0,0 +1,19 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NIMCache +metadata: + name: meta-llama3-8b-instruct +spec: + source: + ngc: + modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3 + pullSecret: ngc-secret + authSecret: ngc-api-secret + model: + engine: tensorrt_llm + tensorParallelism: "1" + storage: + pvc: + create: true + storageClass: "" + size: "50Gi" + volumeAccessMode: ReadWriteOnce diff --git a/config/samples/nim/llm/dra/nimservice.yaml b/config/samples/nim/llm/dra/nimservice.yaml new file mode 100644 index 000000000..382ebeea0 --- /dev/null +++ b/config/samples/nim/llm/dra/nimservice.yaml @@ -0,0 +1,23 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NIMService +metadata: + name: meta-llama3-8b-instruct +spec: + image: + repository: nvcr.io/nim/meta/llama-3.1-8b-instruct + tag: "1.8.3" + pullPolicy: IfNotPresent + pullSecrets: + - ngc-secret + authSecret: ngc-api-secret + storage: + nimCache: + name: meta-llama3-8b-instruct + profile: '' + replicas: 1 + draResources: + - resourceClaimTemplateName: gpu-resourceclaimtemplate + expose: + service: + type: ClusterIP + port: 8000 diff --git a/config/samples/nim/llm/dra/resourceclaimtemplate.yaml b/config/samples/nim/llm/dra/resourceclaimtemplate.yaml new file mode 100644 index 000000000..e147a2cb4 --- /dev/null +++ b/config/samples/nim/llm/dra/resourceclaimtemplate.yaml @@ -0,0 +1,13 @@ +apiVersion: resource.k8s.io/v1beta2 +kind: ResourceClaimTemplate +metadata: + name: gpu-resourceclaimtemplate +spec: + spec: + devices: + requests: + - exactly: + allocationMode: ExactCount + count: 1 + deviceClassName: gpu.nvidia.com + name: gpu