Skip to content

Commit fbd2e28

Browse files
authored
[26.03] (helm) More nemotron rebranding (#1581)
1 parent d38abb2 commit fbd2e28

17 files changed

+166
-166
lines changed

ci/scripts/validate_deployment_configs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ def __str__(self) -> str:
4949
"page-elements": "page_elements",
5050
"graphic-elements": "graphic_elements",
5151
"table-structure": "table_structure",
52-
"ocr": "nemoretriever_ocr_v1",
52+
"ocr": "ocr",
5353
"embedding": "embedqa",
54-
"reranker": "llama_3_2_nv_rerankqa_1b_v2",
54+
"reranker": "rerankqa",
5555
"nemotron-parse": "nemotron_parse",
5656
"vlm": "nemotron_nano_12b_v2_vl",
5757
"audio": "audio",

helm/README.md

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ You can also use NV-Ingest's Python client API to interact with the service runn
298298
| envVars.AUDIO_GRPC_ENDPOINT | string | `"audio:50051"` | |
299299
| envVars.AUDIO_INFER_PROTOCOL | string | `"grpc"` | |
300300
| envVars.COMPONENTS_TO_READY_CHECK | string | `"ALL"` | |
301-
| envVars.EMBEDDING_NIM_ENDPOINT | string | `"http://llama-32-nv-embedqa-1b-v2:8000/v1"` | |
301+
| envVars.EMBEDDING_NIM_ENDPOINT | string | `"http://llama-nemotron-embed-1b-v2:8000/v1"` | |
302302
| envVars.EMBEDDING_NIM_MODEL_NAME | string | `"nvidia/llama-nemotron-embed-1b-v2"` | |
303303
| envVars.IMAGE_STORAGE_PUBLIC_BASE_URL | string | `""` | |
304304
| envVars.IMAGE_STORAGE_URI | string | `"s3://nv-ingest/artifacts/store/images"` | |
@@ -465,46 +465,46 @@ You can also use NV-Ingest's Python client API to interact with the service runn
465465
| nimOperator.graphic_elements.storage.pvc.create | bool | `true` | |
466466
| nimOperator.graphic_elements.storage.pvc.size | string | `"25Gi"` | |
467467
| nimOperator.graphic_elements.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | |
468-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.authSecret | string | `"ngc-api"` | |
469-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.enabled | bool | `false` | |
470-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.env[0].name | string | `"NIM_HTTP_API_PORT"` | |
471-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.env[0].value | string | `"8000"` | |
472-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.env[1].name | string | `"NIM_TRITON_LOG_VERBOSE"` | |
473-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.env[1].value | string | `"1"` | |
474-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.expose.service.grpcPort | int | `8001` | |
475-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.expose.service.port | int | `8000` | |
476-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.expose.service.type | string | `"ClusterIP"` | |
477-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.image.pullPolicy | string | `"IfNotPresent"` | |
478-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.image.pullSecrets[0] | string | `"ngc-secret"` | |
479-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.image.repository | string | `"nvcr.io/nim/nvidia/llama-nemotron-rerank-1b-v2"` | |
480-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.image.tag | string | `"1.10.0"` | |
481-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.replicas | int | `1` | |
482-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.resources.limits."nvidia.com/gpu" | int | `1` | |
483-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.storage.pvc.create | bool | `true` | |
484-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.storage.pvc.size | string | `"50Gi"` | |
485-
| nimOperator.llama_3_2_nv_rerankqa_1b_v2.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | |
486-
| nimOperator.nemoretriever_ocr_v1.authSecret | string | `"ngc-api"` | |
487-
| nimOperator.nemoretriever_ocr_v1.enabled | bool | `true` | |
488-
| nimOperator.nemoretriever_ocr_v1.env[0].name | string | `"OMP_NUM_THREADS"` | |
489-
| nimOperator.nemoretriever_ocr_v1.env[0].value | string | `"8"` | |
490-
| nimOperator.nemoretriever_ocr_v1.env[1].name | string | `"NIM_HTTP_API_PORT"` | |
491-
| nimOperator.nemoretriever_ocr_v1.env[1].value | string | `"8000"` | |
492-
| nimOperator.nemoretriever_ocr_v1.env[2].name | string | `"NIM_TRITON_LOG_VERBOSE"` | |
493-
| nimOperator.nemoretriever_ocr_v1.env[2].value | string | `"1"` | |
494-
| nimOperator.nemoretriever_ocr_v1.env[3].name | string | `"NIM_TRITON_MAX_BATCH_SIZE"` | |
495-
| nimOperator.nemoretriever_ocr_v1.env[3].value | string | `"32"` | |
496-
| nimOperator.nemoretriever_ocr_v1.expose.service.grpcPort | int | `8001` | |
497-
| nimOperator.nemoretriever_ocr_v1.expose.service.port | int | `8000` | |
498-
| nimOperator.nemoretriever_ocr_v1.expose.service.type | string | `"ClusterIP"` | |
499-
| nimOperator.nemoretriever_ocr_v1.image.pullPolicy | string | `"IfNotPresent"` | |
500-
| nimOperator.nemoretriever_ocr_v1.image.pullSecrets[0] | string | `"ngc-secret"` | |
501-
| nimOperator.nemoretriever_ocr_v1.image.repository | string | `"nvcr.io/nim/nvidia/nemotron-ocr-v1"` | |
502-
| nimOperator.nemoretriever_ocr_v1.image.tag | string | `"1.3.0"` | |
503-
| nimOperator.nemoretriever_ocr_v1.replicas | int | `1` | |
504-
| nimOperator.nemoretriever_ocr_v1.resources.limits."nvidia.com/gpu" | int | `1` | |
505-
| nimOperator.nemoretriever_ocr_v1.storage.pvc.create | bool | `true` | |
506-
| nimOperator.nemoretriever_ocr_v1.storage.pvc.size | string | `"25Gi"` | |
507-
| nimOperator.nemoretriever_ocr_v1.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | |
468+
| nimOperator.rerankqa.authSecret | string | `"ngc-api"` | |
469+
| nimOperator.rerankqa.enabled | bool | `false` | |
470+
| nimOperator.rerankqa.env[0].name | string | `"NIM_HTTP_API_PORT"` | |
471+
| nimOperator.rerankqa.env[0].value | string | `"8000"` | |
472+
| nimOperator.rerankqa.env[1].name | string | `"NIM_TRITON_LOG_VERBOSE"` | |
473+
| nimOperator.rerankqa.env[1].value | string | `"1"` | |
474+
| nimOperator.rerankqa.expose.service.grpcPort | int | `8001` | |
475+
| nimOperator.rerankqa.expose.service.port | int | `8000` | |
476+
| nimOperator.rerankqa.expose.service.type | string | `"ClusterIP"` | |
477+
| nimOperator.rerankqa.image.pullPolicy | string | `"IfNotPresent"` | |
478+
| nimOperator.rerankqa.image.pullSecrets[0] | string | `"ngc-secret"` | |
479+
| nimOperator.rerankqa.image.repository | string | `"nvcr.io/nim/nvidia/llama-nemotron-rerank-1b-v2"` | |
480+
| nimOperator.rerankqa.image.tag | string | `"1.10.0"` | |
481+
| nimOperator.rerankqa.replicas | int | `1` | |
482+
| nimOperator.rerankqa.resources.limits."nvidia.com/gpu" | int | `1` | |
483+
| nimOperator.rerankqa.storage.pvc.create | bool | `true` | |
484+
| nimOperator.rerankqa.storage.pvc.size | string | `"50Gi"` | |
485+
| nimOperator.rerankqa.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | |
486+
| nimOperator.ocr.authSecret | string | `"ngc-api"` | |
487+
| nimOperator.ocr.enabled | bool | `true` | |
488+
| nimOperator.ocr.env[0].name | string | `"OMP_NUM_THREADS"` | |
489+
| nimOperator.ocr.env[0].value | string | `"8"` | |
490+
| nimOperator.ocr.env[1].name | string | `"NIM_HTTP_API_PORT"` | |
491+
| nimOperator.ocr.env[1].value | string | `"8000"` | |
492+
| nimOperator.ocr.env[2].name | string | `"NIM_TRITON_LOG_VERBOSE"` | |
493+
| nimOperator.ocr.env[2].value | string | `"1"` | |
494+
| nimOperator.ocr.env[3].name | string | `"NIM_TRITON_MAX_BATCH_SIZE"` | |
495+
| nimOperator.ocr.env[3].value | string | `"32"` | |
496+
| nimOperator.ocr.expose.service.grpcPort | int | `8001` | |
497+
| nimOperator.ocr.expose.service.port | int | `8000` | |
498+
| nimOperator.ocr.expose.service.type | string | `"ClusterIP"` | |
499+
| nimOperator.ocr.image.pullPolicy | string | `"IfNotPresent"` | |
500+
| nimOperator.ocr.image.pullSecrets[0] | string | `"ngc-secret"` | |
501+
| nimOperator.ocr.image.repository | string | `"nvcr.io/nim/nvidia/nemotron-ocr-v1"` | |
502+
| nimOperator.ocr.image.tag | string | `"1.3.0"` | |
503+
| nimOperator.ocr.replicas | int | `1` | |
504+
| nimOperator.ocr.resources.limits."nvidia.com/gpu" | int | `1` | |
505+
| nimOperator.ocr.storage.pvc.create | bool | `true` | |
506+
| nimOperator.ocr.storage.pvc.size | string | `"25Gi"` | |
507+
| nimOperator.ocr.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | |
508508
| nimOperator.nemotron_nano_12b_v2_vl.authSecret | string | `"ngc-api"` | |
509509
| nimOperator.nemotron_nano_12b_v2_vl.enabled | bool | `false` | |
510510
| nimOperator.nemotron_nano_12b_v2_vl.env[0].name | string | `"NIM_HTTP_API_PORT"` | |

helm/mig/nv-ingest-mig-values-25x.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ nemotron-table-structure-v1:
3838
nvidia.com/gpu: 0
3939
nvidia.com/mig-1g.10gb: 1
4040

41-
nvidia-nim-llama-32-nv-embedqa-1b-v2:
41+
nvidia-nim-llama-nemotron-embed-1b-v2:
4242
resources:
4343
limits:
4444
nvidia.com/gpu: 0
@@ -75,8 +75,8 @@ text-embedding-nim:
7575
nvidia.com/gpu: 0
7676
nvidia.com/mig-1g.10gb: 1
7777

78-
# If you want to deploy llama-32-nv-rerankqa-1b-v2
79-
llama-32-nv-rerankqa-1b-v2:
78+
# If you want to deploy llama-nemotron-rerank-1b-v2
79+
llama-nemotron-rerank-1b-v2:
8080
resources:
8181
limits:
8282
nvidia.com/gpu: 0

helm/mig/nv-ingest-mig-values.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ nimOperator:
3939
nvidia.com/gpu: "0"
4040
nvidia.com/mig-1g.10gb: 1
4141

42-
nemoretriever_ocr_v1:
42+
ocr:
4343
resources:
4444
limits:
4545
nvidia.com/gpu: "0"
@@ -48,8 +48,8 @@ nimOperator:
4848
nvidia.com/gpu: "0"
4949
nvidia.com/mig-1g.20gb: 1
5050

51-
# If you want to deploy llama-32-nv-rerankqa-1b-v2
52-
llama_3_2_nv_rerankqa_1b_v2:
51+
# If you want to deploy llama-nemotron-rerank-1b-v2
52+
rerankqa:
5353
enabled: true
5454
resources:
5555
limits:

helm/overrides/values-a100-40gb.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ nimOperator:
6464
- name: OMP_NUM_THREADS
6565
value: "1"
6666

67-
nemoretriever_ocr_v1:
67+
ocr:
6868
env:
6969
- name: OMP_NUM_THREADS
7070
value: "8"
@@ -75,7 +75,7 @@ nimOperator:
7575
- name: NIM_TRITON_MAX_BATCH_SIZE
7676
value: "1"
7777

78-
llama_3_2_nv_rerankqa_1b_v2:
78+
rerankqa:
7979
env:
8080
- name: NIM_HTTP_API_PORT
8181
value: "8000"

helm/overrides/values-a10g.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ nimOperator:
7070
- name: OMP_NUM_THREADS
7171
value: "1"
7272

73-
nemoretriever_ocr_v1:
73+
ocr:
7474
env:
7575
- name: OMP_NUM_THREADS
7676
value: "8"
@@ -81,7 +81,7 @@ nimOperator:
8181
- name: NIM_TRITON_MAX_BATCH_SIZE
8282
value: "1"
8383

84-
llama_3_2_nv_rerankqa_1b_v2:
84+
rerankqa:
8585
env:
8686
- name: NIM_HTTP_API_PORT
8787
value: "8000"

helm/overrides/values-l40s.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ nimOperator:
6464
- name: OMP_NUM_THREADS
6565
value: "1"
6666

67-
nemoretriever_ocr_v1:
67+
ocr:
6868
env:
6969
- name: OMP_NUM_THREADS
7070
value: "8"
@@ -75,7 +75,7 @@ nimOperator:
7575
- name: NIM_TRITON_MAX_BATCH_SIZE
7676
value: "1"
7777

78-
llama_3_2_nv_rerankqa_1b_v2:
78+
rerankqa:
7979
env:
8080
- name: NIM_HTTP_API_PORT
8181
value: "8000"

helm/templates/llama-3.2-nv-rerankqa-1b-v2.yaml

Lines changed: 0 additions & 47 deletions
This file was deleted.

helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml renamed to helm/templates/llama-nemotron-embed-1b-v2.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ spec:
2121
apiVersion: apps.nvidia.com/v1alpha1
2222
kind: NIMService
2323
metadata:
24-
name: llama-32-nv-embedqa-1b-v2
24+
name: llama-nemotron-embed-1b-v2
2525
spec:
2626
image:
2727
repository: {{ .Values.nimOperator.embedqa.image.repository }}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
{{ if and (.Capabilities.APIVersions.Has "apps.nvidia.com/v1alpha1") (eq .Values.nimOperator.rerankqa.enabled true) -}}
2+
apiVersion: apps.nvidia.com/v1alpha1
3+
kind: NIMCache
4+
metadata:
5+
name: llama-nemotron-rerank-1b-v2
6+
annotations:
7+
helm.sh/resource-policy: keep
8+
spec:
9+
source:
10+
ngc:
11+
modelPuller: "{{ .Values.nimOperator.rerankqa.image.repository }}:{{ .Values.nimOperator.rerankqa.image.tag }}"
12+
pullSecret: "{{ index .Values.nimOperator.rerankqa.image.pullSecrets 0 }}"
13+
authSecret: {{ .Values.nimOperator.rerankqa.authSecret }}
14+
storage:
15+
pvc:
16+
create: {{ .Values.nimOperator.rerankqa.storage.pvc.create }}
17+
storageClass: {{ .Values.nimOperator.rerankqa.storage.pvc.storageClass }}
18+
size: {{ .Values.nimOperator.rerankqa.storage.pvc.size }}
19+
volumeAccessMode: {{ .Values.nimOperator.rerankqa.storage.pvc.volumeAccessMode }}
20+
---
21+
apiVersion: apps.nvidia.com/v1alpha1
22+
kind: NIMService
23+
metadata:
24+
name: llama-nemotron-rerank-1b-v2
25+
spec:
26+
image:
27+
repository: {{ .Values.nimOperator.rerankqa.image.repository }}
28+
tag: {{ .Values.nimOperator.rerankqa.image.tag }}
29+
pullPolicy: {{ .Values.nimOperator.rerankqa.image.pullPolicy }}
30+
pullSecrets:
31+
{{ toYaml .Values.nimOperator.rerankqa.image.pullSecrets | nindent 6 }}
32+
authSecret: {{ .Values.nimOperator.rerankqa.authSecret }}
33+
storage:
34+
nimCache:
35+
name: llama-nemotron-rerank-1b-v2
36+
replicas: {{ .Values.nimOperator.rerankqa.replicas }}
37+
nodeSelector:
38+
{{ toYaml .Values.nimOperator.rerankqa.nodeSelector | nindent 4 }}
39+
resources:
40+
{{ toYaml .Values.nimOperator.rerankqa.resources | nindent 4 }}
41+
tolerations:
42+
{{ toYaml .Values.nimOperator.rerankqa.tolerations | nindent 4 }}
43+
expose:
44+
{{ toYaml .Values.nimOperator.rerankqa.expose | nindent 4 }}
45+
env:
46+
{{ toYaml .Values.nimOperator.rerankqa.env | nindent 4 }}
47+
{{- end }}

0 commit comments

Comments
 (0)