NVIDIA · shivamerla · Aug 18, 2025 · Aug 15, 2025
@@ -5,7 +5,7 @@ metadata:
 spec:
   source:
     ngc:
-      modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.3.3
+      modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3
       pullSecret: ngc-secret
       authSecret: ngc-api-secret
       model:

@@ -5,7 +5,7 @@ metadata:
 spec:
   image:
     repository: nvcr.io/nim/meta/llama-3.1-8b-instruct
-    tag: "1.8"
+    tag: "1.8.3"
     pullPolicy: IfNotPresent
     pullSecrets:
       - ngc-secret

@@ -0,0 +1,19 @@
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMCache
+metadata:
+  name: meta-llama3-8b-instruct
+spec:
+  source:
+    ngc:
+      modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3
+      pullSecret: ngc-secret
+      authSecret: ngc-api-secret
+      model:
+        engine: tensorrt_llm
+        tensorParallelism: "1"
+  storage:
+    pvc:
+      create: true
+      storageClass: ""
+      size: "50Gi"
+      volumeAccessMode: ReadWriteOnce
@@ -0,0 +1,25 @@
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMService
+metadata:
+  name: meta-llama3-8b-instruct
+spec:
+  image:
+    repository: nvcr.io/nim/meta/llama-3.1-8b-instruct
+    tag: "1.8.3"
+    pullPolicy: IfNotPresent
+    pullSecrets:
+      - ngc-secret
+  authSecret: ngc-api-secret
+  storage:
+    nimCache:
+      name: meta-llama3-8b-instruct
+      profile: ''
+  replicas: 1
+  draResources:
+  - claimSpec:
+      devices:
+      - name: gpu
+  expose:
+    service:
+      type: ClusterIP
+      port: 8000
@@ -0,0 +1,19 @@
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMCache
+metadata:
+  name: meta-llama3-8b-instruct
+spec:
+  source:
+    ngc:
+      modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3
+      pullSecret: ngc-secret
+      authSecret: ngc-api-secret
+      model:
+        engine: tensorrt_llm
+        tensorParallelism: "1"
+  storage:
+    pvc:
+      create: true
+      storageClass: ""
+      size: "50Gi"
+      volumeAccessMode: ReadWriteOnce
@@ -0,0 +1,45 @@
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMService
+metadata:
+  name: meta-llama3-8b-instruct
+spec:
+  image:
+    repository: nvcr.io/nim/meta/llama-3.1-8b-instruct
+    tag: "1.8.3"
+    pullPolicy: IfNotPresent
+    pullSecrets:
+      - ngc-secret
+  authSecret: ngc-api-secret
+  storage:
+    nimCache:
+      name: meta-llama3-8b-instruct
+      profile: ''
+  replicas: 1
+  draResources:
+  - claimSpec:
+      isTemplate: true
+      devices:
+      - name: gpu
+        deviceClassName: gpu.nvidia.com
+        driverName: gpu.nvidia.com
+        matchAttributes:
+        - key: index
+          op: NotEqual
+          value:
+            intValue: 0
+        - key: driverVersion
+          op: GreaterThanOrEqual
+          value:
+            versionValue: "550.127.8"
+        - key: architecture
+          op: Equal
+          value:
+            stringValue: Ampere
+        matchCapacity:
+        - key: memory
+          op: Equal
+          value: 40Gi
+  expose:
+    service:
+      type: ClusterIP
+      port: 8000
@@ -0,0 +1,19 @@
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMCache
+metadata:
+  name: meta-llama3-8b-instruct
+spec:
+  source:
+    ngc:
+      modelPuller: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.8.3
+      pullSecret: ngc-secret
+      authSecret: ngc-api-secret
+      model:
+        engine: tensorrt_llm
+        tensorParallelism: "1"
+  storage:
+    pvc:
+      create: true
+      storageClass: ""
+      size: "50Gi"
+      volumeAccessMode: ReadWriteOnce
@@ -0,0 +1,23 @@
+apiVersion: apps.nvidia.com/v1alpha1
+kind: NIMService
+metadata:
+  name: meta-llama3-8b-instruct
+spec:
+  image:
+    repository: nvcr.io/nim/meta/llama-3.1-8b-instruct
+    tag: "1.8.3"
+    pullPolicy: IfNotPresent
+    pullSecrets:
+      - ngc-secret
+  authSecret: ngc-api-secret
+  storage:
+    nimCache:
+      name: meta-llama3-8b-instruct
+      profile: ''
+  replicas: 1
+  draResources:
+  - resourceClaimTemplateName: gpu-resourceclaimtemplate
+  expose:
+    service:
+      type: ClusterIP
+      port: 8000
@@ -0,0 +1,13 @@
+apiVersion: resource.k8s.io/v1beta2
+kind: ResourceClaimTemplate
+metadata:
+  name: gpu-resourceclaimtemplate
+spec:
+  spec:
+    devices:
+      requests:
+      - exactly:
+          allocationMode: ExactCount
+          count: 1
+          deviceClassName: gpu.nvidia.com
+        name: gpu