Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions config/samples/nim/caching/datastore/nimcache.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMCache
metadata:
name: meta-llama3-1b-instruct
namespace: nim-service
spec:
source:
dataStore:
endpoint: http://nemodatastore-sample.nemo.svc.cluster.local:8000/v1/hf
modelName: "llama-3-1b-instruct" # default/llama-3-1b-instruct model must be present in NeMo DataStore
authSecret: hf-auth
modelPuller: nvcr.io/nvidia/nemo-microservices/nds-v2-huggingface-cli:25.06
pullSecret: ngc-secret
storage:
pvc:
create: true
storageClass: ""
size: "50Gi"
volumeAccessMode: ReadWriteOnce
21 changes: 21 additions & 0 deletions config/samples/nim/caching/hf/nimcache.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# NIM Cache with Multi-LLM NIM from HF
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMCache
metadata:
name: nim-cache-multi-llm
namespace: nim-service
spec:
source:
hf:
endpoint: "https://huggingface.co"
namespace: "meta-llama"
authSecret: hf-secret # with HF_TOKEN set
modelPuller: nvcr.io/nim/nvidia/llm-nim:1.12
pullSecret: ngc-secret
modelName: "Llama-3.2-1B-Instruct"
storage:
pvc:
create: true
storageClass: ''
size: "50Gi"
volumeAccessMode: ReadWriteOnce
24 changes: 24 additions & 0 deletions config/samples/nim/caching/ngc-mirror/https/nimcache-llm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# LLM NIM Cache with Mirrored Local Model Registry
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMCache
metadata:
name: meta-llama3-2-1b-instruct
namespace: nim-service
spec:
env:
- name: NIM_REPOSITORY_OVERRIDE
value: "https://<server-name>:<port>/"
source:
ngc:
modelPuller: nvcr.io/nim/meta/llama-3.2-1b-instruct:1.8
pullSecret: ngc-secret
authSecret: https-api-secret
model:
engine: "tensorrt"
tensorParallelism: "1"
storage:
pvc:
create: true
storageClass: ''
size: "50Gi"
volumeAccessMode: ReadWriteOnce
24 changes: 24 additions & 0 deletions config/samples/nim/caching/ngc-mirror/jfrog/nimcache-llm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# LLM NIM Cache with Mirrored Local Model Registry
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMCache
metadata:
name: meta-llama3-2-1b-instruct
namespace: nim-service
spec:
env:
- name: NIM_REPOSITORY_OVERRIDE
value: "jfrog://<server-name>:<port>/"
source:
ngc:
modelPuller: nvcr.io/nim/meta/llama-3.2-1b-instruct:1.8
pullSecret: ngc-secret
authSecret: jfrog-api-secret
model:
engine: "tensorrt"
tensorParallelism: "1"
storage:
pvc:
create: true
storageClass: ''
size: "50Gi"
volumeAccessMode: ReadWriteOnce
28 changes: 28 additions & 0 deletions config/samples/nim/caching/ngc-mirror/s3/nimcache-llm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# LLM NIM Cache with Mirrored Local Model Registry
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMCache
metadata:
name: meta-llama3-2-1b-instruct
namespace: nim-service
spec:
env:
- name: NIM_REPOSITORY_OVERRIDE
value: "s3://nim_bucket/"
- name: AWS_PROFILE
value: "default"
- name: AWS_REGION
value: "us-east-1"
source:
ngc:
modelPuller: nvcr.io/nim/meta/llama-3.2-1b-instruct:1.8
pullSecret: ngc-secret
authSecret: aws-api-secret
model:
engine: "tensorrt"
tensorParallelism: "1"
storage:
pvc:
create: true
storageClass: ''
size: "50Gi"
volumeAccessMode: ReadWriteOnce
22 changes: 22 additions & 0 deletions config/samples/nim/caching/ngc/nimcache.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# NIM Cache with LLM-Specific NIM from NGC
apiVersion: apps.nvidia.com/v1alpha1
kind: NIMCache
metadata:
labels:
app.kubernetes.io/name: k8s-nim-operator
name: meta-llama-3-2-1b-instruct
namespace: nim-service
spec:
source:
ngc:
modelPuller: nvcr.io/nim/meta/llama-3.2-1b-instruct:1.8
pullSecret: ngc-secret
authSecret: ngc-api-secret
model:
engine: "tensorrt"
tensorParallelism: "1"
storage:
pvc:
create: true
size: "50Gi"
volumeAccessMode: ReadWriteOnce
40 changes: 0 additions & 40 deletions config/samples/nim/kserve/serverless/nimcache.yaml

This file was deleted.

64 changes: 0 additions & 64 deletions config/samples/nim/kserve/serverless/nimservice.yaml

This file was deleted.

19 changes: 0 additions & 19 deletions config/samples/nim/llm/basic/nimcache-llm.yaml

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Loading
Loading