diff --git a/docker-compose.yaml b/docker-compose.yaml index 7a5a43619..12c33b2c0 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -9,7 +9,6 @@ services: - "6379:6379" page-elements: - image: ${YOLOX_IMAGE:-nvcr.io/nim/nvidia/nemoretriever-page-elements-v3}:${YOLOX_TAG:-1.7.0} shm_size: 16gb ports: - "8000:8000" @@ -46,7 +45,6 @@ services: runtime: nvidia graphic-elements: - image: ${YOLOX_GRAPHIC_ELEMENTS_IMAGE:-nvcr.io/nim/nvidia/nemoretriever-graphic-elements-v1}:${YOLOX_GRAPHIC_ELEMENTS_TAG:-1.6.0} shm_size: 16gb ports: - "8003:8000" @@ -71,7 +69,6 @@ services: runtime: nvidia table-structure: - image: ${YOLOX_TABLE_STRUCTURE_IMAGE:-nvcr.io/nim/nvidia/nemoretriever-table-structure-v1}:${YOLOX_TABLE_STRUCTURE_TAG:-1.6.0} shm_size: 16gb ports: - "8006:8000" @@ -96,7 +93,6 @@ services: runtime: nvidia ocr: - image: ${OCR_IMAGE:-nvcr.io/nim/nvidia/nemoretriever-ocr-v1}:${OCR_TAG:-1.2.1} shm_size: 16gb ports: - "8009:8000" @@ -121,7 +117,6 @@ services: embedding: # NIM ON - image: ${EMBEDDING_IMAGE:-nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2}:${EMBEDDING_TAG:-1.10.1} shm_size: 16gb ports: - "8012:8000" @@ -146,7 +141,6 @@ services: reranker: # NIM ON - image: ${RERANKER_IMAGE:-nvcr.io/nim/nvidia/llama-3.2-nv-rerankqa-1b-v2}:${RERANKER_TAG:-1.8.0} shm_size: 16gb ports: - "8020:8000" @@ -167,7 +161,6 @@ services: - reranker nemotron-parse: - image: ${NEMOTRON_PARSE_IMAGE:-nvcr.io/nim/nvidia/nemotron-parse}:${NEMOTRON_PARSE_TAG:-1.5.0} shm_size: 16gb ports: - "8015:8000" @@ -190,7 +183,6 @@ services: - nemotron-parse vlm: - image: ${VLM_IMAGE:-nvcr.io/nim/nvidia/nemotron-nano-12b-v2-vl}:${VLM_TAG:-1.5.0} shm_size: 16gb ports: - "8018:8000" @@ -215,7 +207,6 @@ services: - vlm audio: - image: ${AUDIO_IMAGE:-nvcr.io/nim/nvidia/parakeet-1-1b-ctc-en-us}:${AUDIO_TAG:-1.4.0} shm_size: 2gb ports: - "8021:50051" # grpc diff --git a/docs/docs/extraction/audio.md b/docs/docs/extraction/audio.md index ec2e26370..f1f02132c 100644 --- a/docs/docs/extraction/audio.md +++ b/docs/docs/extraction/audio.md @@ -65,7 +65,7 @@ Use the following procedure to run the NIM locally. 3. Start the nv-ingest services with the `audio` profile. This profile includes the necessary components for audio processing. Use the following command. The `--profile audio` flag ensures that speech-specific services are launched. For more information, refer to [Profile Information](quickstart-guide.md#profile-information). ```shell - docker compose --profile retrieval --profile audio up + docker compose -f docker-compose.yaml -f release.yaml --profile retrieval --profile audio up ``` 4. After the services are running, you can interact with nv-ingest by using Python. diff --git a/docs/docs/extraction/nemoretriever-parse.md b/docs/docs/extraction/nemoretriever-parse.md index 1b187f31a..e18c94d0a 100644 --- a/docs/docs/extraction/nemoretriever-parse.md +++ b/docs/docs/extraction/nemoretriever-parse.md @@ -37,7 +37,7 @@ Use the following procedure to run the NIM locally. - The --profile nemotron-parse flag ensures that vision-language retrieval services are launched. For more information, refer to [Profile Information](quickstart-guide.md#profile-information). ```shell - docker compose --profile nemotron-parse up + docker compose -f docker-compose.yaml -f release.yaml --profile nemotron-parse up ``` 2. After the services are running, you can interact with nv-ingest by using Python. diff --git a/docs/docs/extraction/nv-ingest-python-api.md b/docs/docs/extraction/nv-ingest-python-api.md index d7c18a26f..903d8cc28 100644 --- a/docs/docs/extraction/nv-ingest-python-api.md +++ b/docs/docs/extraction/nv-ingest-python-api.md @@ -510,7 +510,7 @@ ingestor = ingestor.store( ```bash # Set DATASET_ROOT before starting services export DATASET_ROOT=/raid/my-project/nv-ingest-data -docker compose up -d +docker compose -f docker-compose.yaml -f release.yaml up -d ``` ```python diff --git a/docs/docs/extraction/quickstart-guide.md b/docs/docs/extraction/quickstart-guide.md index 55f1e980a..5269320d2 100644 --- a/docs/docs/extraction/quickstart-guide.md +++ b/docs/docs/extraction/quickstart-guide.md @@ -48,9 +48,14 @@ If you prefer, you can run on Kubernetes by using [our Helm chart](https://githu `sudo nvidia-ctk runtime configure --runtime=docker --set-as-default` -6. Start core services. This example uses the retrieval profile. For more information about other profiles, see [Profile Information](#profile-information). +6. Start core services. This example uses the retrieval profile. For more information about other profiles, see [Profile Information](#profile-information). - `docker compose --profile retrieval up` + ```shell + docker compose \ + -f docker-compose.yaml \ + -f release.yaml \ + --profile retrieval up + ``` !!! tip @@ -61,12 +66,13 @@ If you prefer, you can run on Kubernetes by using [our Helm chart](https://githu The default configuration may not fit on a single GPU for some hardware targets. If you are running on any of the following GPUs, use a `docker compose` override file to reduce VRAM usage: - A100-SXM4-40GB - A10G - Override files typically lower per-service memory allocation, batch sizes, or concurrency. This trades peak throughput for making the full pipeline runnable on the available GPU. To use an override file, include it in your `docker compose up` command by using a second `-f` flag after the base `docker-compose.yaml` file. The settings in the second file override the values that are set in the first file. + Override files typically lower per-service memory allocation, batch sizes, or concurrency. This trades peak throughput for making the full pipeline runnable on the available GPU. To use an override file, include it in your `docker compose up` command by adding an additional `-f` flag after `release.yaml`. The settings in later files override the values that are set in earlier files. The following example uses an override file that contains settings that are optimized for an NVIDIA A100 GPU with 40GB of VRAM. ```shell docker compose \ -f docker-compose.yaml \ + -f release.yaml \ -f docker-compose.a100-40gb.yaml \ --profile retrieval up ``` diff --git a/docs/docs/extraction/vlm-embed.md b/docs/docs/extraction/vlm-embed.md index eb7d364af..89a37a864 100644 --- a/docs/docs/extraction/vlm-embed.md +++ b/docs/docs/extraction/vlm-embed.md @@ -17,18 +17,30 @@ The model supports images that contain text, tables, charts, and infographics. Use the following procedure to configure and run the multimodal embedding NIM locally. -1. Set the embedding model in your .env file. This tells NeMo Retriever extraction to use the Llama 3.2 Multimodal model instead of the default text-only embedding model. +1. Set the embedding model name in your `.env` file. This tells NeMo Retriever extraction to use the Llama 3.2 Multimodal model instead of the default text-only embedding model. ``` - EMBEDDING_IMAGE=nvcr.io/nvidia/nemo-microservices/llama-3.2-nemoretriever-1b-vlm-embed-v1 - EMBEDDING_TAG=1.7.0 EMBEDDING_NIM_MODEL_NAME=nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1 ``` -2. Start the NeMo Retriever extraction services. The multimodal embedding service is included by default. +2. Create a Docker Compose override file to swap the embedding NIM image. + + Create `docker-compose.vlm-embed.yaml` with the following contents: + + ```yaml + services: + embedding: + image: nvcr.io/nvidia/nemo-microservices/llama-3.2-nemoretriever-1b-vlm-embed-v1:1.7.0 + ``` + +3. Start the NeMo Retriever extraction services. ``` - docker compose --profile retrieval up + docker compose \ + -f docker-compose.yaml \ + -f release.yaml \ + -f docker-compose.vlm-embed.yaml \ + --profile retrieval up ``` diff --git a/helm/templates/_helpers.tpl b/helm/templates/_helpers.tpl index d845918f6..f0832ce7d 100644 --- a/helm/templates/_helpers.tpl +++ b/helm/templates/_helpers.tpl @@ -74,3 +74,34 @@ Create secret to access docker registry {{- define "nv-ingest.ngcApiSecret" }} {{- printf "%s" .Values.ngcApiSecret.password }} {{- end }} + +{{/* +Resolve NIM image ref/repo/tag from release.yaml. + +release.yaml is intended to be passed as a Helm values file and contains: + x-nv-ingest-release: + nim: + : + ref: "repo:tag" +*/}} + +{{- define "nv-ingest.release.nimRef" -}} +{{- $root := .root -}} +{{- $key := .key -}} +{{- $release := index $root.Values "x-nv-ingest-release" -}} +{{- if and $release (hasKey $release "nim") (hasKey (index $release "nim") $key) (hasKey (index (index $release "nim") $key) "ref") -}} +{{- index (index (index $release "nim") $key) "ref" -}} +{{- else -}} +{{- fail (printf "Missing required release.yaml value: x-nv-ingest-release.nim.%s.ref (pass -f release.yaml)" $key) -}} +{{- end -}} +{{- end -}} + +{{- define "nv-ingest.release.nimRepository" -}} +{{- $ref := include "nv-ingest.release.nimRef" . -}} +{{- regexReplaceAll ":([^:]*)$" $ref "" -}} +{{- end -}} + +{{- define "nv-ingest.release.nimTag" -}} +{{- $ref := include "nv-ingest.release.nimRef" . -}} +{{- regexReplaceAll "^.*:" $ref "" -}} +{{- end -}} diff --git a/helm/templates/audio.yaml b/helm/templates/audio.yaml index 39101da82..47647c16c 100644 --- a/helm/templates/audio.yaml +++ b/helm/templates/audio.yaml @@ -8,7 +8,7 @@ metadata: spec: source: ngc: - modelPuller: "{{ .Values.nimOperator.audio.image.repository }}:{{ .Values.nimOperator.audio.image.tag }}" + modelPuller: "{{ include "nv-ingest.release.nimRef" (dict "root" . "key" "audio") }}" pullSecret: "{{ index .Values.nimOperator.audio.image.pullSecrets 0 }}" authSecret: {{ .Values.nimOperator.audio.authSecret }} storage: @@ -24,8 +24,8 @@ metadata: name: audio spec: image: - repository: {{ .Values.nimOperator.audio.image.repository }} - tag: {{ .Values.nimOperator.audio.image.tag }} + repository: {{ include "nv-ingest.release.nimRepository" (dict "root" . "key" "audio") }} + tag: {{ include "nv-ingest.release.nimTag" (dict "root" . "key" "audio") }} pullPolicy: IfNotPresent pullSecrets: {{ toYaml .Values.nimOperator.audio.image.pullSecrets | nindent 6 }} @@ -33,7 +33,7 @@ spec: storage: nimCache: name: audio - replicas: {{ .Values.nimOperator.nemotron_parse.replicas }} + replicas: {{ .Values.nimOperator.audio.replicas }} resources: {{ toYaml .Values.nimOperator.audio.resources | nindent 4 }} tolerations: diff --git a/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml b/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml index e4479ca88..6026173b7 100644 --- a/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml +++ b/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml @@ -8,7 +8,7 @@ metadata: spec: source: ngc: - modelPuller: "{{ .Values.nimOperator.embedqa.image.repository }}:{{ .Values.nimOperator.embedqa.image.tag }}" + modelPuller: "{{ include "nv-ingest.release.nimRef" (dict "root" . "key" "embedqa") }}" pullSecret: "{{ index .Values.nimOperator.embedqa.image.pullSecrets 0 }}" authSecret: {{ .Values.nimOperator.embedqa.authSecret }} storage: @@ -24,8 +24,8 @@ metadata: name: llama-32-nv-embedqa-1b-v2 spec: image: - repository: {{ .Values.nimOperator.embedqa.image.repository }} - tag: {{ .Values.nimOperator.embedqa.image.tag }} + repository: {{ include "nv-ingest.release.nimRepository" (dict "root" . "key" "embedqa") }} + tag: {{ include "nv-ingest.release.nimTag" (dict "root" . "key" "embedqa") }} pullPolicy: {{ .Values.nimOperator.embedqa.image.pullPolicy }} pullSecrets: {{ toYaml .Values.nimOperator.embedqa.image.pullSecrets | nindent 6 }} diff --git a/helm/templates/llama-3.2-nv-rerankqa-1b-v2.yaml b/helm/templates/llama-3.2-nv-rerankqa-1b-v2.yaml index 066fb9215..f143f463c 100644 --- a/helm/templates/llama-3.2-nv-rerankqa-1b-v2.yaml +++ b/helm/templates/llama-3.2-nv-rerankqa-1b-v2.yaml @@ -8,7 +8,7 @@ metadata: spec: source: ngc: - modelPuller: "{{ .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.image.repository }}:{{ .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.image.tag }}" + modelPuller: "{{ include "nv-ingest.release.nimRef" (dict "root" . "key" "llama_3_2_nv_rerankqa_1b_v2") }}" pullSecret: "{{ index .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.image.pullSecrets 0 }}" authSecret: {{ .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.authSecret }} storage: @@ -24,8 +24,8 @@ metadata: name: llama-32-nv-rerankqa-1b-v2 spec: image: - repository: {{ .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.image.repository }} - tag: {{ .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.image.tag }} + repository: {{ include "nv-ingest.release.nimRepository" (dict "root" . "key" "llama_3_2_nv_rerankqa_1b_v2") }} + tag: {{ include "nv-ingest.release.nimTag" (dict "root" . "key" "llama_3_2_nv_rerankqa_1b_v2") }} pullPolicy: {{ .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.image.pullPolicy }} pullSecrets: {{ toYaml .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.image.pullSecrets | nindent 6 }} diff --git a/helm/templates/nemoretriever-graphic-elements-v1.yaml b/helm/templates/nemoretriever-graphic-elements-v1.yaml index 2c1e97555..b8df37517 100644 --- a/helm/templates/nemoretriever-graphic-elements-v1.yaml +++ b/helm/templates/nemoretriever-graphic-elements-v1.yaml @@ -8,7 +8,7 @@ metadata: spec: source: ngc: - modelPuller: "{{ .Values.nimOperator.graphic_elements.image.repository }}:{{ .Values.nimOperator.graphic_elements.image.tag }}" + modelPuller: "{{ include "nv-ingest.release.nimRef" (dict "root" . "key" "graphic_elements") }}" pullSecret: "{{ index .Values.nimOperator.graphic_elements.image.pullSecrets 0 }}" authSecret: {{ .Values.nimOperator.graphic_elements.authSecret }} storage: @@ -24,8 +24,8 @@ metadata: name: nemoretriever-graphic-elements-v1 spec: image: - repository: {{ .Values.nimOperator.graphic_elements.image.repository }} - tag: {{ .Values.nimOperator.graphic_elements.image.tag }} + repository: {{ include "nv-ingest.release.nimRepository" (dict "root" . "key" "graphic_elements") }} + tag: {{ include "nv-ingest.release.nimTag" (dict "root" . "key" "graphic_elements") }} pullPolicy: {{ .Values.nimOperator.graphic_elements.image.pullPolicy }} pullSecrets: {{ toYaml .Values.nimOperator.graphic_elements.image.pullSecrets | nindent 6 }} diff --git a/helm/templates/nemoretriever-ocr-v1.yaml b/helm/templates/nemoretriever-ocr-v1.yaml index 5cba31147..19a2e13ad 100644 --- a/helm/templates/nemoretriever-ocr-v1.yaml +++ b/helm/templates/nemoretriever-ocr-v1.yaml @@ -8,7 +8,7 @@ metadata: spec: source: ngc: - modelPuller: "{{ .Values.nimOperator.nemoretriever_ocr_v1.image.repository }}:{{ .Values.nimOperator.nemoretriever_ocr_v1.image.tag }}" + modelPuller: "{{ include "nv-ingest.release.nimRef" (dict "root" . "key" "nemoretriever_ocr_v1") }}" pullSecret: "{{ index .Values.nimOperator.nemoretriever_ocr_v1.image.pullSecrets 0 }}" authSecret: {{ .Values.nimOperator.nemoretriever_ocr_v1.authSecret }} storage: @@ -24,8 +24,8 @@ metadata: name: nemoretriever-ocr-v1 spec: image: - repository: {{ .Values.nimOperator.nemoretriever_ocr_v1.image.repository }} - tag: {{ .Values.nimOperator.nemoretriever_ocr_v1.image.tag }} + repository: {{ include "nv-ingest.release.nimRepository" (dict "root" . "key" "nemoretriever_ocr_v1") }} + tag: {{ include "nv-ingest.release.nimTag" (dict "root" . "key" "nemoretriever_ocr_v1") }} pullPolicy: {{ .Values.nimOperator.nemoretriever_ocr_v1.image.pullPolicy }} pullSecrets: {{ toYaml .Values.nimOperator.nemoretriever_ocr_v1.image.pullSecrets | nindent 6 }} authSecret: {{ .Values.nimOperator.nemoretriever_ocr_v1.authSecret }} diff --git a/helm/templates/nemoretriever-page-elements-v3.yaml b/helm/templates/nemoretriever-page-elements-v3.yaml index a2567bbf4..1b87f4abb 100644 --- a/helm/templates/nemoretriever-page-elements-v3.yaml +++ b/helm/templates/nemoretriever-page-elements-v3.yaml @@ -8,7 +8,7 @@ metadata: spec: source: ngc: - modelPuller: "{{ .Values.nimOperator.page_elements.image.repository }}:{{ .Values.nimOperator.page_elements.image.tag }}" + modelPuller: "{{ include "nv-ingest.release.nimRef" (dict "root" . "key" "page_elements") }}" pullSecret: "{{ index .Values.nimOperator.page_elements.image.pullSecrets 0 }}" authSecret: {{ .Values.nimOperator.page_elements.authSecret }} storage: @@ -25,8 +25,8 @@ metadata: name: nemoretriever-page-elements-v3 spec: image: - repository: {{ .Values.nimOperator.page_elements.image.repository }} - tag: {{ .Values.nimOperator.page_elements.image.tag }} + repository: {{ include "nv-ingest.release.nimRepository" (dict "root" . "key" "page_elements") }} + tag: {{ include "nv-ingest.release.nimTag" (dict "root" . "key" "page_elements") }} pullPolicy: {{ .Values.nimOperator.page_elements.image.pullPolicy }} pullSecrets: {{ toYaml .Values.nimOperator.page_elements.image.pullSecrets | nindent 6 }} authSecret: {{ .Values.nimOperator.page_elements.authSecret }} diff --git a/helm/templates/nemoretriever-table-structure-v1.yaml b/helm/templates/nemoretriever-table-structure-v1.yaml index 20fb7bb6f..1f420e777 100644 --- a/helm/templates/nemoretriever-table-structure-v1.yaml +++ b/helm/templates/nemoretriever-table-structure-v1.yaml @@ -8,7 +8,7 @@ metadata: spec: source: ngc: - modelPuller: "{{ .Values.nimOperator.table_structure.image.repository }}:{{ .Values.nimOperator.table_structure.image.tag }}" + modelPuller: "{{ include "nv-ingest.release.nimRef" (dict "root" . "key" "table_structure") }}" pullSecret: "{{ index .Values.nimOperator.table_structure.image.pullSecrets 0 }}" authSecret: {{ .Values.nimOperator.table_structure.authSecret }} storage: @@ -24,8 +24,8 @@ metadata: name: nemoretriever-table-structure-v1 spec: image: - repository: {{ .Values.nimOperator.table_structure.image.repository }} - tag: {{ .Values.nimOperator.table_structure.image.tag }} + repository: {{ include "nv-ingest.release.nimRepository" (dict "root" . "key" "table_structure") }} + tag: {{ include "nv-ingest.release.nimTag" (dict "root" . "key" "table_structure") }} pullPolicy: {{ .Values.nimOperator.table_structure.image.pullPolicy }} pullSecrets: {{ toYaml .Values.nimOperator.table_structure.image.pullSecrets | nindent 6 }} diff --git a/helm/templates/nemotron-nano-12b-v2-vl.yaml b/helm/templates/nemotron-nano-12b-v2-vl.yaml index 1b5c1ca9d..c6a5f833f 100644 --- a/helm/templates/nemotron-nano-12b-v2-vl.yaml +++ b/helm/templates/nemotron-nano-12b-v2-vl.yaml @@ -8,7 +8,7 @@ metadata: spec: source: ngc: - modelPuller: "{{ .Values.nimOperator.nemotron_nano_12b_v2_vl.image.repository }}:{{ .Values.nimOperator.nemotron_nano_12b_v2_vl.image.tag }}" + modelPuller: "{{ include "nv-ingest.release.nimRef" (dict "root" . "key" "nemotron_nano_12b_v2_vl") }}" pullSecret: "{{ index .Values.nimOperator.nemotron_nano_12b_v2_vl.image.pullSecrets 0 }}" authSecret: {{ .Values.nimOperator.nemotron_nano_12b_v2_vl.authSecret }} storage: @@ -24,8 +24,8 @@ metadata: name: nemotron-nano-12b-v2-vl spec: image: - repository: {{ .Values.nimOperator.nemotron_nano_12b_v2_vl.image.repository }} - tag: {{ .Values.nimOperator.nemotron_nano_12b_v2_vl.image.tag }} + repository: {{ include "nv-ingest.release.nimRepository" (dict "root" . "key" "nemotron_nano_12b_v2_vl") }} + tag: {{ include "nv-ingest.release.nimTag" (dict "root" . "key" "nemotron_nano_12b_v2_vl") }} pullPolicy: IfNotPresent pullSecrets: {{ toYaml .Values.nimOperator.nemotron_nano_12b_v2_vl.image.pullSecrets | nindent 6 }} diff --git a/helm/templates/nemotron-parse.yaml b/helm/templates/nemotron-parse.yaml index 25f3e07ac..882c0e2ab 100644 --- a/helm/templates/nemotron-parse.yaml +++ b/helm/templates/nemotron-parse.yaml @@ -8,7 +8,7 @@ metadata: spec: source: ngc: - modelPuller: "{{ .Values.nimOperator.nemotron_parse.image.repository }}:{{ .Values.nimOperator.nemotron_parse.image.tag }}" + modelPuller: "{{ include "nv-ingest.release.nimRef" (dict "root" . "key" "nemotron_parse") }}" pullSecret: "{{ index .Values.nimOperator.nemotron_parse.image.pullSecrets 0 }}" authSecret: {{ .Values.nimOperator.nemotron_parse.authSecret }} storage: @@ -24,8 +24,8 @@ metadata: name: nemotron-parse spec: image: - repository: {{ .Values.nimOperator.nemotron_parse.image.repository }} - tag: {{ .Values.nimOperator.nemotron_parse.image.tag }} + repository: {{ include "nv-ingest.release.nimRepository" (dict "root" . "key" "nemotron_parse") }} + tag: {{ include "nv-ingest.release.nimTag" (dict "root" . "key" "nemotron_parse") }} pullPolicy: IfNotPresent pullSecrets: {{ toYaml .Values.nimOperator.nemotron_parse.image.pullSecrets | nindent 6 }} diff --git a/helm/values.yaml b/helm/values.yaml index 947ed4ecb..b1543742f 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -599,6 +599,9 @@ containerArgs: [] ## @section Nim Operator parameters ## @param nimCache.pvc.storageClass Specifies the storage class for the PVCs nimOperator: + # NOTE: NIM image versions are sourced from `release.yaml` (x-nv-ingest-release.*). + # To avoid drift between Helm and docker-compose, update versions in `release.yaml` + # and pass it to Helm: `helm upgrade --install ... -f helm/values.yaml -f release.yaml`. ## @section Nim Operator NimCache Configuration ## @param nimOperator.nimCache.pvc.storageClass [string] Storage class to use for all NimCache PVCs. Overrides per-model storageClass if set. nimCache: @@ -845,8 +848,8 @@ nimOperator: nemoretriever_ocr_v1: enabled: true image: - repository: nvcr.io/nvidia/nemo-microservices/nemoretriever-ocr-v1 - tag: 1.1.0 + repository: nvcr.io/nim/nvidia/nemoretriever-ocr-v1 + tag: 1.2.1 pullPolicy: IfNotPresent pullSecrets: - ngc-secret diff --git a/release.yaml b/release.yaml new file mode 100644 index 000000000..f12297037 --- /dev/null +++ b/release.yaml @@ -0,0 +1,54 @@ +# +# Single source of truth for NIM image versions. +# +# This file is intentionally BOTH: +# - a docker-compose override file (top-level `services:`) +# - a Helm values override file (top-level `x-nv-ingest-release:`) +# +# Usage: +# - Docker Compose: `docker compose -f docker-compose.yaml -f release.yaml up` +# - Helm: `helm upgrade --install nv-ingest ./helm -f helm/values.yaml -f release.yaml` +# + +x-nv-ingest-release: + nim: + # Keys here match Helm's `.Values.nimOperator.` entries. + page_elements: + ref: &page_elements_ref "nvcr.io/nim/nvidia/nemoretriever-page-elements-v3:1.7.0" + graphic_elements: + ref: &graphic_elements_ref "nvcr.io/nim/nvidia/nemoretriever-graphic-elements-v1:1.6.0" + table_structure: + ref: &table_structure_ref "nvcr.io/nim/nvidia/nemoretriever-table-structure-v1:1.6.0" + nemoretriever_ocr_v1: + ref: &ocr_ref "nvcr.io/nim/nvidia/nemoretriever-ocr-v1:1.2.1" + embedqa: + ref: &embedqa_ref "nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:1.10.1" + llama_3_2_nv_rerankqa_1b_v2: + ref: &rerankqa_ref "nvcr.io/nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:1.8.0" + nemotron_parse: + ref: &nemotron_parse_ref "nvcr.io/nim/nvidia/nemotron-parse:1.5.0" + nemotron_nano_12b_v2_vl: + ref: &nemotron_nano_12b_v2_vl_ref "nvcr.io/nim/nvidia/nemotron-nano-12b-v2-vl:1.5.0" + audio: + ref: &audio_ref "nvcr.io/nim/nvidia/parakeet-1-1b-ctc-en-us:1.4.0" + +services: + # These service names match `docker-compose.yaml`. + page-elements: + image: *page_elements_ref + graphic-elements: + image: *graphic_elements_ref + table-structure: + image: *table_structure_ref + ocr: + image: *ocr_ref + embedding: + image: *embedqa_ref + reranker: + image: *rerankqa_ref + nemotron-parse: + image: *nemotron_parse_ref + vlm: + image: *nemotron_nano_12b_v2_vl_ref + audio: + image: *audio_ref diff --git a/tools/harness/plans/SERVICE_MANAGER.md b/tools/harness/plans/SERVICE_MANAGER.md index aaeb70bd6..c845e4e39 100644 --- a/tools/harness/plans/SERVICE_MANAGER.md +++ b/tools/harness/plans/SERVICE_MANAGER.md @@ -92,7 +92,7 @@ CLI (run.py/nightly.py) └─> create_service_manager(config, repo_root, sku) └─> DockerComposeManager(config, repo_root, sku) └─> Check: docker-compose.a10g.yaml exists? - └─> _build_compose_cmd() adds: -f docker-compose.yaml -f docker-compose.a10g.yaml + └─> _build_compose_cmd() adds: -f docker-compose.yaml -f release.yaml -f docker-compose.a10g.yaml └─> Used by: start(), stop(), dump_logs() ``` diff --git a/tools/harness/src/nv_ingest_harness/service_manager/docker_compose.py b/tools/harness/src/nv_ingest_harness/service_manager/docker_compose.py index 0a0a1285a..4d042e526 100644 --- a/tools/harness/src/nv_ingest_harness/service_manager/docker_compose.py +++ b/tools/harness/src/nv_ingest_harness/service_manager/docker_compose.py @@ -23,6 +23,7 @@ def __init__(self, config, repo_root: Path, sku: str | None = None): """ super().__init__(config, repo_root) self.compose_file = str(repo_root / "docker-compose.yaml") + self.release_file = str(repo_root / "release.yaml") self.sku = sku self.override_file = None @@ -46,6 +47,13 @@ def _build_compose_cmd(self, base_cmd: list[str]) -> list[str]: Command list with -f flags for compose file(s) """ cmd = base_cmd + ["-f", self.compose_file] + + # Always include release.yaml so NIM images stay in sync with Helm. + if Path(self.release_file).exists(): + cmd += ["-f", self.release_file] + else: + print(f"Warning: release.yaml not found at {self.release_file} (NIM images may be unset)") + if self.override_file: cmd += ["-f", self.override_file] return cmd diff --git a/tools/harness/src/nv_ingest_harness/service_manager/helm.py b/tools/harness/src/nv_ingest_harness/service_manager/helm.py index 40a00ccd3..654d20360 100644 --- a/tools/harness/src/nv_ingest_harness/service_manager/helm.py +++ b/tools/harness/src/nv_ingest_harness/service_manager/helm.py @@ -137,32 +137,28 @@ def start(self, no_build: bool = False) -> int: if self.chart_version: cmd += ["--version", self.chart_version] - # Parse and add values from YAML file if specified + # Values files + # - Always include release.yaml (if present) so NIM image versions match docker-compose. + # - Include helm/values.yaml by default when using the local chart. + local_chart_path = (self.repo_root / "helm").resolve() + is_local_chart = Path(self.chart_ref).resolve() == local_chart_path if Path(self.chart_ref).exists() else False + + # User-specified values file (relative to repo root) if self.values_file: values_path = self.repo_root / self.values_file if values_path.exists(): - try: - print(f"Loading values from {values_path}...") - with open(values_path, "r") as f: - values_data = yaml.safe_load(f) - - if values_data: - # Flatten the YAML structure - flattened_values = self._flatten_dict(values_data) - print(f"Parsed {len(flattened_values)} value(s) from {self.values_file}") - - # Add to command using --set and --set-json - cmd = self._add_values_to_command(cmd, flattened_values) - else: - print(f"Warning: Values file {values_path} is empty") - except yaml.YAMLError as e: - print(f"Error: Failed to parse YAML file {values_path}: {e}") - return 1 - except Exception as e: - print(f"Error: Failed to read values file {values_path}: {e}") - return 1 + cmd += ["-f", str(values_path)] else: print(f"Warning: Values file {values_path} not found, skipping") + elif is_local_chart: + # Default for local chart + cmd += ["-f", str(local_chart_path / "values.yaml")] + + release_values_path = self.repo_root / "release.yaml" + if release_values_path.exists(): + cmd += ["-f", str(release_values_path)] + else: + print(f"Warning: release.yaml not found at {release_values_path} (NIM images may be unset)") # Add inline values from config if hasattr(self.config, "helm_values") and self.config.helm_values: