-
Notifications
You must be signed in to change notification settings - Fork 308
Expand file tree
/
Copy pathvalues.yaml
More file actions
1044 lines (965 loc) · 43 KB
/
values.yaml
File metadata and controls
1044 lines (965 loc) · 43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
## @section Deployment parameters
## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity
## @param affinity [object] [default: {}] Affinity settings for deployment.
affinity: {}
## @param nodeSelector [object] Sets node selectors for the NIM -- for example `nvidia.com/gpu.present: "true"`
nodeSelector: {}
## @param logLevel Log level of NVIngest service. Possible values of the variable are TRACE, DEBUG, INFO, WARNING, ERROR, CRITICAL.
logLevel: DEFAULT
## @param extraEnvVarsCM [string] [default: ""] A Config map holding Environment variables to include in the NVIngest container
extraEnvVarsCM: ""
## @param extraEnvVarsSecret [string] [default: ""] A K8S Secret to map to Environment variables to include in the NVIngest container
extraEnvVarsSecret: ""
## @param fullnameOverride [string] [default: ""] A name to force the fullname of the NVIngest container to have, defaults to the Helm Release Name
fullnameOverride: ""
## @param nameOverride [string] [default: ""] A name to base the objects created by this helm chart
nameOverride: ""
## @section Image Configuration
## @param image.repository [string] NIM Image Repository
## @param image.tag [string] Image tag or version
## @param image.pullPolicy [string] Image pull policy
image:
pullPolicy: IfNotPresent
repository: "nvcr.io/nvidia/nemo-microservices/nv-ingest"
tag: "26.03.0-RC2"
## @section Pod Configuration
## @param podAnnotations [object] Sets additional annotations on the main deployment pods
podAnnotations:
traffic.sidecar.istio.io/excludeOutboundPorts: '8007'
## @param podLabels [object] Specify extra labels to be add to on deployed pods.
podLabels: {}
## @param podSecurityContext.fsGroup Specify file system owner group id.
podSecurityContext:
fsGroup: 1000
## @param extraVolumes [object] Adds arbitrary additional volumes to the deployment set definition
extraVolumes: {}
## @param extraVolumeMounts [object] Specify volume mounts to the main container from `extraVolumes`
extraVolumeMounts: {}
## @section Image Pull Secrets
## @param imagePullSecrets[].name List of secret names needed for the main container and any init containers
imagePullSecrets:
- name: ngc-api
- name: ngc-secret
## @param containerSecurityContext [object] Sets privilege and access control settings for container (Only affects the main container, not pod-level)
containerSecurityContext: {}
## @param tolerations [array] Specify tolerations for pod assignment. Allows the scheduler to schedule pods with matching taints.
tolerations: []
## @param replicaCount [default: 1] The number of replicas for NVIngest when autoscaling is disabled
replicaCount: 1
## @section Resource Configuration
## @param resources.limits.memory [default: 200Gi] Specify limit for memory
## @param resources.limits.cpu [default: "48000m"] Specify limit for CPU
## @param resources.requests.memory [default: 24Gi] Specify request for memory
## @param resources.requests.cpu [default: "24000m"] Specify request for CPU
resources:
limits: {}
requests:
memory: 24Gi
cpu: "24000m"
## @section Autoscaling parameters
## @descriptionStart
## Values used for creating a `Horizontal Pod Autoscaler`. If autoscaling is not enabled, the rest are ignored.
## NVIDIA recommends usage of the custom metrics API, commonly implemented with the prometheus-adapter.
## Standard metrics of CPU and memory are of limited use in scaling NIM.
## @descriptionEnd
## @param autoscaling.enabled Enables horizontal pod autoscaler.
## @param autoscaling.minReplicas Specify minimum replicas for autoscaling.
## @param autoscaling.maxReplicas Specify maximum replicas for autoscaling.
## @param autoscaling.metrics Array of metrics for autoscaling.
autoscaling:
enabled: false
maxReplicas: 100
minReplicas: 1
metrics: []
## @param tmpDirSize [default: 50Gi] Specify the amount of space to reserve for temporary storage
tmpDirSize: 50Gi
## @section Environment Variables
## @descriptionStart
## Define environment variables as key/value dictionary pairs
## @descriptionEnd
## @param envVars [default: sane {}] Adds arbitrary environment variables to the main container using key-value pairs, for example NAME: value
## @param envVars.ARROW_DEFAULT_MEMORY_POOL [default: "system"] Memory pool configuration for Apache Arrow
## @param envVars.INGEST_LOG_LEVEL [default: "DEFAULT"] Log level for the ingest service
## @param envVars.INGEST_EDGE_BUFFER_SIZE [default: "64"] Size of the edge buffer for ingestion
## @param envVars.INGEST_DYNAMIC_MEMORY_THRESHOLD [default: "0.80"] Dynamic memory threshold for ingestion
## @param envVars.MAX_INGEST_PROCESS_WORKERS [default: "32"] Maximum Ingestion worker processes
## @param envVars.MESSAGE_CLIENT_HOST [default: "nv-ingest-redis-master"] Override this value to specify a differing REST endpoint host
## @param envVars.MESSAGE_CLIENT_PORT [default: "6379"] Override this value to specify a differing REST endpoint port
## @param envVars.MESSAGE_CLIENT_TYPE [default: "redis"] Type of message client to use
## @param envVars.REDIS_INGEST_TASK_QUEUE [default: "ingest_task_queue"] Name of the Redis queue for ingest tasks
## @param envVars.REDIS_POOL_SIZE [default: "50"] Maximum Redis connection pool size. Increase for high-concurrency workloads (e.g., 100-200 for large batch jobs)
## @param envVars.NV_INGEST_DEFAULT_TIMEOUT_MS [default: "1234"] Override the Timeout of the NVIngest requests
## @param envVars.NV_INGEST_MAX_UTIL [default: "32"] Maximum number of CPU cores to utilize for processing. Defaults to number of available CPU cores if not set
## @param envVars.MINIO_INTERNAL_ADDRESS [default: "nv-ingest-minio:9000"] Override this to the cluster local DNS name of minio
## @param envVars.MINIO_PUBLIC_ADDRESS [default: "http://localhost:9000"] Override this to publicly routable minio address, default assumes port-forwarding
## @param envVars.MINIO_BUCKET [default: "nv-ingest"] Override this for specific minio bucket to upload extracted images to
## @param envVars.MINIO_ACCESS_KEY [default: "minioadmin"] MinIO access key for authentication
## @param envVars.MINIO_SECRET_KEY [default: "minioadmin"] MinIO secret key for authentication
## @param envVars.IMAGE_STORAGE_URI [default: "s3://nv-ingest/artifacts/store/images"] fsspec-compatible destination for stored images
## @param envVars.IMAGE_STORAGE_PUBLIC_BASE_URL [default: ""] Optional HTTP base URL for serving stored images when using object storage
## @param envVars.NEMOTRON_PARSE_HTTP_ENDPOINT [default: "http://nemotron-parse:8000/v1/chat/completions"] HTTP endpoint for Nemotron Parse service
## @param envVars.NEMOTRON_PARSE_INFER_PROTOCOL [default: "http"] Protocol for Nemotron Parse service
## @param envVars.NEMOTRON_PARSE_MODEL_NAME [default: "nvidia/nemotron-parse"] Model name for Nemotron Parse
## @param envVars.YOLOX_GRPC_ENDPOINT [default: "nemotron-page-elements-v3:8001"] gRPC endpoint for YOLOX page elements
## @param envVars.YOLOX_HTTP_ENDPOINT [default: "http://nemotron-page-elements-v3:8000/v1/infer"] HTTP endpoint for YOLOX page elements
## @param envVars.YOLOX_INFER_PROTOCOL [default: "grpc"] Protocol for YOLOX page elements
## @param envVars.YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT [default: "nemotron-graphic-elements-v1:8001"] gRPC endpoint for YOLOX graphic elements
## @param envVars.YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT [default: "http://nemotron-graphic-elements-v1:8000/v1/infer"] HTTP endpoint for YOLOX graphic elements
## @param envVars.YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL [default: "grpc"] Protocol for YOLOX graphic elements
## @param envVars.YOLOX_TABLE_STRUCTURE_GRPC_ENDPOINT [default: "nemotron-table-structure-v1:8001"] gRPC endpoint for YOLOX table structure
## @param envVars.YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT [default: "http://nemotron-table-structure-v1:8000/v1/infer"] HTTP endpoint for YOLOX table structure
## @param envVars.YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL [default: "grpc"] Protocol for YOLOX table structure
## @param envVars.EMBEDDING_NIM_ENDPOINT [default: "http://nv-ingest-embedqa:8000/v1"] Endpoint for embedding service
## @param envVars.EMBEDDING_NIM_MODEL_NAME [default: "nvidia/llama-nemotron-embed-1b-v2"] Model name for embedding service
## @param envVars.MILVUS_ENDPOINT [default: "http://nv-ingest-milvus:19530"] Endpoint for Milvus vector database
## @param envVars.VLM_CAPTION_ENDPOINT [default: "https://integrate.api.nvidia.com/v1/chat/completions"] Endpoint for VLM caption service
## @param envVars.VLM_CAPTION_MODEL_NAME [default: "nvidia/nemotron-nano-12b-v2-vl"] Model name for VLM caption service
## @param envVars.VLM_CAPTION_PROMPT [default: "Caption the content of this image:"] Override caption prompt sent as the user message
## @param envVars.VLM_CAPTION_SYSTEM_PROMPT [default: "/no_think"] Override system prompt for reasoning toggle (/think or /no_think)
## @param envVars.AUDIO_GRPC_ENDPOINT [default: "audio:50051"] gRPC endpoint for audio service
## @param envVars.AUDIO_INFER_PROTOCOL [default: "grpc"] Protocol for audio service
## @param envVars.COMPONENTS_TO_READY_CHECK [default: "ALL"] Components to check during readiness probe
## @param envVars.MODEL_PREDOWNLOAD_PATH [default: "/workspace/models/"] Path for pre-downloading models
envVars:
INGEST_LOG_LEVEL: "DEFAULT"
ARROW_DEFAULT_MEMORY_POOL: "system"
OMP_NUM_THREADS: 1
RAY_num_grpc_threads: 1
RAY_num_server_call_thread: 1
RAY_worker_num_grpc_internal_threads: 1
MAX_INGEST_PROCESS_WORKERS: 32
NV_INGEST_MAX_UTIL: 32
INGEST_DYNAMIC_MEMORY_THRESHOLD: 0.80
INGEST_EDGE_BUFFER_SIZE: 64
INGEST_DISABLE_DYNAMIC_SCALING: true
MESSAGE_CLIENT_HOST: "nv-ingest-redis-master"
MESSAGE_CLIENT_PORT: "6379"
MESSAGE_CLIENT_TYPE: "redis"
# Image Storage Configuration
# Provide a single fsspec-compatible URI (file://, s3://, etc.)
# Uses existing volume mounts for file:// paths i.e. file:///workspace/data/artifacts/store/images
IMAGE_STORAGE_URI: "s3://nv-ingest/artifacts/store/images"
# Optional public base URL for rendering object storage assets
IMAGE_STORAGE_PUBLIC_BASE_URL: ""
AUDIO_GRPC_ENDPOINT: "audio:50051"
AUDIO_INFER_PROTOCOL: "grpc"
EMBEDDING_NIM_ENDPOINT: "http://llama-nemotron-embed-1b-v2:8000/v1"
EMBEDDING_NIM_MODEL_NAME: "nvidia/llama-nemotron-embed-1b-v2"
NEMOTRON_PARSE_HTTP_ENDPOINT: http://nemotron-parse:8000/v1/chat/completions
NEMOTRON_PARSE_INFER_PROTOCOL: http
NEMOTRON_PARSE_MODEL_NAME: nvidia/nemotron-parse
YOLOX_PAGE_IMAGE_FORMAT: JPEG
YOLOX_GRPC_ENDPOINT: nemotron-page-elements-v3:8001
YOLOX_HTTP_ENDPOINT: http://nemotron-page-elements-v3:8000/v1/infer
YOLOX_INFER_PROTOCOL: grpc
YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT: nemotron-graphic-elements-v1:8001
YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT: http://nemotron-graphic-elements-v1:8000/v1/infer
YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL: grpc
YOLOX_TABLE_STRUCTURE_GRPC_ENDPOINT: nemotron-table-structure-v1:8001
YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT: http://nemotron-table-structure-v1:8000/v1/infer
YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL: grpc
OCR_GRPC_ENDPOINT: nemotron-ocr-v1:8001
OCR_HTTP_ENDPOINT: http://nemotron-ocr-v1:8000/v1/infer
OCR_INFER_PROTOCOL: grpc
OCR_MODEL_NAME: pipeline
MINIO_BUCKET: nv-ingest
MINIO_ACCESS_KEY: minioadmin
MINIO_SECRET_KEY: minioadmin
MINIO_INTERNAL_ADDRESS: nv-ingest-minio:9000
MINIO_PUBLIC_ADDRESS: http://localhost:9000
MILVUS_ENDPOINT: "http://nv-ingest-milvus:19530"
# This environment variable is controlled in helm/templates/deployment.yaml. It is set to "http://nemotron-nano-12b-v2-vl:8000/v1/chat/completions"
# to use the locally deployed NIM if nimOperator.nemotron_nano_12b_v2_vl.enabled is true.
# To use the NGC hosted NIM (Build API), manually set this value to "https://integrate.api.nvidia.com/v1/chat/completions".
# VLM_CAPTION_ENDPOINT: "DO_NOT_USE_HERE_FOR_REFERENCE_ONLY"
VLM_CAPTION_MODEL_NAME: "nvidia/nemotron-nano-12b-v2-vl"
# "ready" check configuration.
# 1. COMPONENTS_TO_READY_CHECK= to disable and readiness checking
# 2. COMPONENTS_TO_READY_CHECK=ALL for checking all services
# 3. COMPONENTS_TO_READY_CHECK=YOLOX_HTTP_ENDPOINT, OCR_HTTP_ENDPOINT
# comma separated list of HTTP environment variables for specific services to check for ready
COMPONENTS_TO_READY_CHECK: "ALL"
MODEL_PREDOWNLOAD_PATH: "/workspace/models/"
## @section Milvus Deployment parameters
## @descriptionStart
## NVIngest uses Milvus and Minio to store extracted images from a document
## This chart by default sets up a Milvus standalone instance in the namespace using the
## Helm chart at found https://artifacthub.io/packages/helm/milvus-helm/milvus
## @descriptionEnd
## @param milvusDeployed [default: true] Whether to deploy Milvus and Minio from this helm chart
milvusDeployed: true
## @section Milvus parameters
## @descriptionStart
## Milvus is used as the vector database for storing and searching embeddings.
## The chart uses the official Milvus Helm chart found at https://artifacthub.io/packages/helm/milvus-helm/milvus
## By default this deploys Milvus in standalone mode with GPU support.
## @descriptionEnd
## @param milvus.image.all.repository [default: milvusdb/milvus] The Milvus container image repository
## @param milvus.image.all.tag [default: v2.5.3-gpu] The Milvus container image tag
## @param milvus.cluster.enabled [default: false] Whether to deploy Milvus in cluster mode
## @param milvus.standalone.resources.limits.nvidia.com/gpu [default: 1] Number of GPUs to allocate to Milvus
## @param milvus.standalone.persistence.persistentVolumeClaim.size [default: 50Gi] Size of the PVC for Milvus data
## @param milvus.standalone.persistence.persistentVolumeClaim.storageClass Storage class to use for the PVC
## @param milvus.minio.mode [default: standalone] MinIO deployment mode
## @param milvus.minio.bucketName [default: nv-ingest] Name of the MinIO bucket to create
## @param milvus.minio.persistence.size [default: 50Gi] Size of the PVC for MinIO data
## @param milvus.minio.persistence.storageClass Storage class to use for MinIO PVC
milvus:
image:
all:
repository: milvusdb/milvus
tag: v2.6.5-gpu
cluster:
enabled: false
etcd:
image:
repository: "milvusdb/etcd"
tag: "3.5.23-r2"
replicaCount: 1
extraVolumes: []
extraVolumeMounts: []
persistence:
storageClass: null
minio:
enabled: true
mode: standalone
image:
tag: "RELEASE.2025-09-07T16-13-09Z"
bucketName: nv-ingest
persistence:
size: 50Gi
storageClass: null
pulsarv3:
enabled: false
pulsar:
enabled: false
standalone:
resources:
limits:
nvidia.com/gpu: 1
persistence:
persistentVolumeClaim:
size: 50Gi
storageClass: null
extraEnv:
- name: LOG_LEVEL
value: error
## @section Redis configurations
## @descriptionStart
## Include any redis configuration that you'd like with the deployed Redis
## Find values at https://github.com/bitnami/charts/tree/main/bitnami/redis
## @descriptionEnd
## @param redisDeployed [default: true] Whether to deploy Redis from this helm chart
redisDeployed: true
## @section Redis parameters
## @descriptionStart
## Configure Redis settings for the deployment. These values are passed directly to the Redis Helm chart.
## For a complete list of configuration options, see: https://github.com/bitnami/charts/tree/main/bitnami/redis
## @descriptionEnd
## @param redis Redis configuration options
## @param redis.auth.enabled [default: false] Enable Redis authentication
## @param redis.replica.replicaCount [default: 1] Number of Redis replicas
## @param redis.replica.persistence.size [default: "50Gi"] Size of persistent volume for Redis replicas
## @param redis.replica.resources.requests.memory [default: "6Gi"] Memory requests for Redis replicas
## @param redis.replica.resources.limits.memory [default: "12Gi"] Memory limits for Redis replicas
## @param redis.master.persistence.size [default: "50Gi"] Size of persistent volume for Redis master
## @param redis.master.resources.requests.memory [default: "6Gi"] Memory requests for Redis master
## @param redis.master.resources.limits.memory [default: "12Gi"] Memory limits for Redis master
## @param redis.master.configmap [default: "protected-mode no"] Redis master configuration
redis:
image:
repository: redis
tag: "8.2.3"
auth:
enabled: false
replica:
replicaCount: 1
persistence:
size: "50Gi"
resources:
requests:
memory: "6Gi"
limits:
memory: "12Gi"
master:
persistence:
size: "50Gi"
resources:
requests:
memory: "6Gi"
limits:
memory: "12Gi"
configmap: |-
protected-mode no
## @section Prometheus configurations
## @descriptionStart
## Include any Prometheus configuration that you'd like with the deployed Prometheus
## For a complete list of configuration options see: https://github.com/prometheus-community/helm-charts/blob/main/charts/prometheus/values.yaml
## @descriptionEnd
## @param prometheus Prometheus configuration options
## @param prometheus.enabled [default: true] Whether to deploy Prometheus from this helm chart
## @param prometheus.server.enabled [default: true] Enable the Prometheus server
## @param prometheus.alertmanager.enabled [default: false] Enable the Prometheus alertmanager
prometheus:
enabled: false
server:
enabled: false
alertmanager:
enabled: false
## @section Open Telemetry
## @descriptionStart
## Define environment variables as key/value dictionary pairs for configuring OTEL Deployments
## A sane set of parameters is set for the deployed version of OpenTelemetry with this Helm Chart.
## Override any values to the Open Telemetry helm chart by overriding the `open-telemetry` value.
## @descriptionEnd
## @param otelEnabled [default: true] Whether to enable OTEL collection
otelEnabled: true
## @param otelDeployed [default: true] Whether to deploy OTEL from this helm chart
otelDeployed: true
## @skip opentelemetry-collector
## @extra opentelemetry-collector [default: sane {}] Configures the opentelemetry helm chart - see https://github.com/open-telemetry/opentelemetry-helm-charts/blob/main/charts/opentelemetry-collector/values.yaml
opentelemetry-collector:
image:
repository: "otel/opentelemetry-collector-contrib"
tag: "0.140.0"
mode: deployment
config:
receivers:
otlp:
protocols:
grpc:
endpoint: "${env:MY_POD_IP}:4317"
http:
endpoint: "${env:MY_POD_IP}:4318"
cors:
allowed_origins: ["*"]
exporters:
zipkin:
endpoint: "http://nv-ingest-zipkin:9411/api/v2/spans"
debug:
verbosity: detailed
extensions:
health_check: {}
zpages:
endpoint: 0.0.0.0:55679
processors:
batch: {}
tail_sampling:
policies:
- name: drop_noisy_traces_url
type: string_attribute
string_attribute:
key: http.target
values: ["/health"]
enabled_regex_matching: true
invert_match: true
transform:
trace_statements:
- context: span
statements:
- set(status.code, 1) where attributes["http.path"] == "/health"
# replace aspects of the span after anonymization
- replace_match(attributes["http.route"], "/v1", attributes["http.target"]) where attributes["http.target"] != nil
- replace_pattern(name, "/v1", attributes["http.route"]) where attributes["http.route"] != nil
- set(name, Concat([name, attributes["http.url"]], " ")) where name == "POST"
service:
extensions: [zpages, health_check]
pipelines:
traces:
receivers: [otlp]
processors: [batch, tail_sampling, transform]
exporters: [debug, zipkin]
metrics:
receivers: [otlp]
processors: [batch]
exporters: [debug]
logs:
receivers: [otlp]
processors: [batch]
exporters: [debug]
## @param otelEnvVars [default: sane {}] Adds arbitrary environment variables for configuring OTEL using key-value pairs, for example NAME: value
## @extra otelEnvVars.OTEL_EXPORTER_OTLP_ENDPOINT Default deployment target for GRPC otel - Default "http://{{ .Release.Name }}-opentelemetry-collector:4317"
## @param otelEnvVars.OTEL_SERVICE_NAME [default: "nemo-retrieval-service" ]
## @param otelEnvVars.OTEL_TRACES_EXPORTER [default: "otlp" ]
## @param otelEnvVars.OTEL_METRICS_EXPORTER [default: "otlp" ]
## @param otelEnvVars.OTEL_LOGS_EXPORTER [default: "none" ]
## @param otelEnvVars.OTEL_PROPAGATORS [default: "tracecontext baggage" ]
## @param otelEnvVars.OTEL_RESOURCE_ATTRIBUTES [default: "deployment.environment=$(NAMESPACE)" ]
## @param otelEnvVars.OTEL_PYTHON_EXCLUDED_URLS [default: "health" ]
otelEnvVars:
# OpenTelemetry
OTEL_SERVICE_NAME: "nemo-retrieval-service"
OTEL_TRACES_EXPORTER: otlp
OTEL_METRICS_EXPORTER: otlp
OTEL_LOGS_EXPORTER: none
OTEL_PROPAGATORS: "tracecontext,baggage"
OTEL_RESOURCE_ATTRIBUTES: "deployment.environment=$(NAMESPACE)"
OTEL_PYTHON_EXCLUDED_URLS: "health"
## @param zipkinDeployed [default: true] Whether to deploy Zipkin with OpenTelemetry from this helm chart
zipkinDeployed: true
zipkin:
image:
repository: "openzipkin/zipkin"
tag: "3.5.0"
zipkin:
extraEnv:
JAVA_OPTS: "-Xms2g -Xmx4g -XX:+ExitOnOutOfMemoryError"
resources:
limits:
cpu: 500m
memory: 4.5Gi
requests:
cpu: 100m
memory: 2.5Gi
## @section Ingress parameters # You can override the storage class from values.yaml using Helm templating:
## @param ingress.hosts[].host Specify name of host.
## @param ingress.hosts[].paths[].path Specify ingress path.
## @param ingress.hosts[].paths[].pathType Specify path type.
## @param ingress.tls Specify list of pairs of TLS `secretName` and hosts.
ingress:
enabled: false
className: ""
annotations: {}
hosts:
- host: chart-example.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
## @section Probe parameters
## @param livenessProbe.enabled Enables `livenessProbe`
## @param livenessProbe.httpGet.path `LivenessProbe` endpoint path
## @param livenessProbe.httpGet.port `LivenessProbe` endpoint port
## @param livenessProbe.initialDelaySeconds Initial delay seconds for `livenessProbe`
## @param livenessProbe.timeoutSeconds Timeout seconds for `livenessProbe`
## @param livenessProbe.periodSeconds Period seconds for `livenessProbe`
## @param livenessProbe.successThreshold Success threshold for `livenessProbe`
## @param livenessProbe.failureThreshold Failure threshold for `livenessProbe`
livenessProbe:
enabled: false
httpGet:
path: /v1/health/live
port: http
initialDelaySeconds: 120
periodSeconds: 10
timeoutSeconds: 20
failureThreshold: 20
successThreshold: 1
## @section Probe parameters
## @param readinessProbe.enabled Enables `readinessProbe`
## @param readinessProbe.httpGet.path `ReadinessProbe` endpoint path
## @param readinessProbe.httpGet.port `ReadinessProbe` endpoint port
## @param readinessProbe.initialDelaySeconds Initial delay seconds for `readinessProbe`
## @param readinessProbe.timeoutSeconds Timeout seconds for `readinessProbe`
## @param readinessProbe.periodSeconds Period seconds for `readinessProbe`
## @param readinessProbe.successThreshold Success threshold for `readinessProbe`
## @param readinessProbe.failureThreshold Failure threshold for `readinessProbe`
readinessProbe:
enabled: true
httpGet:
path: /v1/health/ready
port: http
initialDelaySeconds: 120
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 220
successThreshold: 1
## @section Service parameters
## @param service.type Specifies the service type for the deployment.
## @param service.name Overrides the default service name
## @param service.port Specifies the HTTP Port for the service.
## @param service.nodePort Specifies an optional HTTP Node Port for the service.
## @param service.annotations [object] Specify additional annotations to be added to service.
## @param service.labels [object] Specifies additional labels to be added to service.
service:
type: ClusterIP
port: 7670
annotations: {}
labels: {}
name: "" # override the default service name
nodePort: null
## @section Service Account
## @param serviceAccount.create Specifies whether a service account should be created.
## @param serviceAccount.annotations [object] Sets annotations to be added to the service account.
## @param serviceAccount.name Specifies the name of the service account to use.
## @param serviceAccount.automount [default: true] Specifies whether to automatically mount the service account token.
serviceAccount:
annotations: {}
automount: true
create: true
name: ""
## @section Secret Creation
## @descriptionStart
## Manage the creation of secrets used by the helm chart
## @descriptionEnd
# ngcApi:
# # If set to false, the chart expects a secret with the name
# create: false
# password: ""
## @param ngcApiSecret.create Specifies whether to create the ngc api secret
## @param ngcApiSecret.password The password to use for the NGC Secret
ngcApiSecret:
# If set to false, the chart expects a secret with name ngc-api to exist in the namespace
# credentials are needed.
create: false
password: ""
## @param ngcImagePullSecret.create Specifies whether to create the NVCR Image Pull secret
## @param ngcImagePullSecret.password The password to use for the NVCR Image Pull Secret
## @param ngcImagePullSecret.registry [default: "nvcr.io"] The registry URL
## @param ngcImagePullSecret.name [default: "ngcImagePullSecret"] The name of the secret
## @param ngcImagePullSecret.username [default: "$oauthtoken"] The username for the registry
ngcImagePullSecret:
create: false
# Leave blank, if no imagePullSecret is needed.
registry: "nvcr.io"
name: "ngcImagePullSecret"
# If set to false, the chart expects either a imagePullSecret
# with the name configured above to be present on the cluster or that no
# credentials are needed.
username: '$oauthtoken'
password: ""
## @section Container Configuration
## @param nemo.userID [default: "1000"] User ID for the NEMO container
## @param nemo.groupID [default: "1000"] Group ID for the NEMO container
## @param containerArgs [array] Additional arguments to pass to the container
nemo:
userID: "1000"
groupID: "1000"
## @param containerArgs [array] Additional arguments to pass to the container
containerArgs: []
## @section Nim Operator parameters
## @param nimCache.pvc.storageClass Specifies the storage class for the PVCs
nimOperator:
## @section Nim Operator NimCache Configuration
## @param nimOperator.nimCache.pvc.storageClass [string] Storage class to use for all NimCache PVCs. Overrides per-model storageClass if set.
nimCache:
pvc:
create: true
# If specified, sets the storageClass for all NIMCache PVCS created by the Helm chart.
storageClass: "default"
size: "25Gi"
volumeAccessMode: ReadWriteOnce
## @section Nim Operator NimService Configuration
## @param nimOperator.nimService.enabled [bool] Whether to deploy NimServices as part of operator
## @param nimOperator.nimService.namespaces [list] List of Kubernetes namespaces where NimServices should be created (defaults to Helm namespace if empty)
## @param nimOperator.nimService.resources [object] Override resources for NimServices, if desired
nimService:
namespaces: []
resources: {}
## @section NIM Services Configuration
##
## Define each NIM service instance used by nv-ingest and its configuration.
## You can enable/disable, set resource limits, storage, image, and environment variables for each NIM.
##
## @param page_elements [object] Configuration for Page Elements NIM
## @param page_elements.enabled [bool] Enable the Page Elements NIM service
## @param page_elements.image.repository [string] Image repository for the NIM
## @param page_elements.image.tag [string] Image tag for the NIM
## @param page_elements.image.pullPolicy [string] Kubernetes pull policy for the image
## @param page_elements.image.pullSecrets [array] List of imagePullSecrets for NGC
## @param page_elements.authSecret [string] Name of the secret to use for authentication
## @param page_elements.storage.pvc.create [bool] Whether to create a new PVC for storage
## @param page_elements.storage.pvc.size [string] Size of the persistent storage
## @param page_elements.storage.pvc.volumeAccessMode [string] PVC access mode
## @param page_elements.replicas [int] Number of replicas to deploy
## @param page_elements.resources [object] Resource requests/limits for the deployment
## @param page_elements.expose.service.type [string] Kubernetes Service type
## @param page_elements.expose.service.port [int] HTTP port for the service
## @param page_elements.expose.service.grpcPort [int] gRPC port for the service
## @param page_elements.env [array] Additional environment variables for the container
page_elements:
enabled: true
image:
repository: nvcr.io/nim/nvidia/nemotron-page-elements-v3
tag: 1.8.0
pullPolicy: IfNotPresent
pullSecrets:
- ngc-secret
authSecret: ngc-api
storage:
# Defaults to inherit from nimOperator.nimCache.pvc.storageClass, size, and volumeAccessMode
pvc:
create: true
size: "25Gi"
volumeAccessMode: "ReadWriteOnce"
replicas: 1
resources:
limits:
nvidia.com/gpu: 1
expose:
service:
type: ClusterIP
port: 8000
service:
grpcPort: 8001
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "32"
- name: NIM_TRITON_CPU_THREADS_PRE_PROCESSOR
value: "2"
- name: OMP_NUM_THREADS
value: "2"
- name: NIM_TRITON_CPU_THREADS_PRE_PROCESSOR
value: "2"
- name: NIM_TRITON_CPU_THREADS_POST_PROCESSOR
value: "1"
- name: NIM_ENABLE_OTEL
value: "true"
- name: NIM_OTEL_SERVICE_NAME
value: "page-elements"
- name: NIM_OTEL_TRACES_EXPORTER
value: "otlp"
- name: NIM_OTEL_METRICS_EXPORTER
value: "console"
- name: NIM_OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://otel-collector:4318"
# Triton OpenTelemetry Settings
- name: TRITON_OTEL_URL
value: "http://otel-collector:4318/v1/traces"
- name: TRITON_OTEL_RATE
value: "1"
## @param graphic_elements [object] Configuration for Graphic Elements NIM
## @param graphic_elements.enabled [bool] Enable the Graphic Elements NIM service
## @param graphic_elements.image.* [various] Image configuration for the NIM
## @param graphic_elements.authSecret [string] Authentication secret name
## @param graphic_elements.storage.* [various] Storage/PVC configuration for the NIM
## @param graphic_elements.replicas [int] Number of replicas to deploy
## @param graphic_elements.resources [object] Resource requests/limits for the deployment
## @param graphic_elements.expose.service.* [object] Service port configuration
## @param graphic_elements.env [array] Additional environment variables for the container
graphic_elements:
enabled: true
image:
repository: nvcr.io/nim/nvidia/nemotron-graphic-elements-v1
tag: 1.8.0
pullPolicy: IfNotPresent
pullSecrets:
- ngc-secret
authSecret: ngc-api
storage:
pvc:
create: true
size: "25Gi"
volumeAccessMode: "ReadWriteOnce"
replicas: 1
resources:
limits:
nvidia.com/gpu: 1
expose:
service:
type: ClusterIP
port: 8000
grpcPort: 8001
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_RATE_LIMIT
value: "3"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "32"
- name: NIM_TRITON_CUDA_MEMORY_POOL_MB
value: "2048"
- name: OMP_NUM_THREADS
value: "1"
## @param table_structure [object] Configuration for Table Structure NIM
## @param table_structure.enabled [bool] Enable the Table Structure NIM service
## @param table_structure.image.* [various] Image config for the Table Structure model
## @param table_structure.authSecret [string] Authentication secret
## @param table_structure.storage.* [various] PVC/storage config
## @param table_structure.replicas [int] Number of replicas
## @param table_structure.resources [object] Resource limits/requests
## @param table_structure.expose.service.* [object] Ports to expose
## @param table_structure.env [array] Additional environment variables
table_structure:
enabled: true
image:
repository: nvcr.io/nim/nvidia/nemotron-table-structure-v1
tag: 1.8.0
pullPolicy: IfNotPresent
pullSecrets:
- ngc-secret
authSecret: ngc-api
storage:
pvc:
create: true
size: "25Gi"
volumeAccessMode: "ReadWriteOnce"
replicas: 1
resources:
limits:
nvidia.com/gpu: 1
expose:
service:
type: ClusterIP
port: 8000
grpcPort: 8001
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_RATE_LIMIT
value: "3"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "32"
- name: NIM_TRITON_CUDA_MEMORY_POOL_MB
value: "2048"
- name: OMP_NUM_THREADS
value: "1"
## @param embedqa [object] Configuration for EmbedQA NIM
## @param embedqa.enabled [bool] Enable the EmbedQA service
## @param embedqa.image.* [various] EmbedQA NIM image configuration
## @param embedqa.authSecret [string] Authentication secret name
## @param embedqa.storage.* [object] PVC configuration
## @param embedqa.replicas [int] Number of replicas
## @param embedqa.resources [object] Resource requests/limits
## @param embedqa.expose.* [object] Service endpoint configuration
## @param embedqa.env [array] Environment variables
embedqa:
enabled: true
image:
repository: nvcr.io/nim/nvidia/llama-nemotron-embed-1b-v2
tag: 1.13.0
pullPolicy: IfNotPresent
pullSecrets:
- ngc-secret
authSecret: ngc-api
storage:
pvc:
create: true
size: "50Gi"
volumeAccessMode: "ReadWriteOnce"
replicas: 1
resources:
limits:
nvidia.com/gpu: 1
expose:
service:
type: ClusterIP
port: 8000
grpcPort: 8001
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: OMP_NUM_THREADS
value: "1"
- name: NIM_TRITON_PERFORMANCE_MODE
value: "throughput"
## @param ocr [object] Configuration for Nemotron OCR v1 NIM
## @param ocr.enabled [bool] Enable the Nemotron OCR v1 service
## @param ocr.image.* [various] Image settings for Nemotron OCR v1
## @param ocr.authSecret [string] Secret for authentication
## @param ocr.storage.* [object] Storage/PVC configuration
## @param ocr.replicas [int] Number of service replicas
## @param ocr.resources [object] Limits/requests for compute resources
## @param ocr.expose.* [object] Ports and service config
## @param ocr.env [array] Additional environment variables
ocr:
enabled: true
image:
repository: nvcr.io/nim/nvidia/nemotron-ocr-v1
tag: 1.3.0
pullPolicy: IfNotPresent
pullSecrets:
- ngc-secret
authSecret: ngc-api
storage:
pvc:
create: true
size: "25Gi"
volumeAccessMode: "ReadWriteOnce"
replicas: 1
resources:
limits:
nvidia.com/gpu: 1
expose:
service:
type: ClusterIP
port: 8000
grpcPort: 8001
env:
- name: OMP_NUM_THREADS
value: "8"
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "32"
## @param rerankqa [object] Configuration for LLaMA-3.2 NV RerankQA 1B v2 NIM
## @param rerankqa.enabled [bool] Enable this NIM
## @param rerankqa.image.* [various] Image repository/tag for this NIM
## @param rerankqa.authSecret [string] Authentication secret for the NIM
## @param rerankqa.storage.* [various] Storage/PVC configuration
## @param rerankqa.replicas [int] Number of replicas
## @param rerankqa.resources [object] Limits/requests for resources
## @param rerankqa.expose.* [object] Port/service configuration
## @param rerankqa.env [array] Additional environment variables
rerankqa:
enabled: false
image:
repository: nvcr.io/nim/nvidia/llama-nemotron-rerank-1b-v2
tag: 1.10.0
pullPolicy: IfNotPresent
pullSecrets:
- ngc-secret
authSecret: ngc-api
storage:
pvc:
create: true
size: "50Gi"
volumeAccessMode: "ReadWriteOnce"
replicas: 1
resources:
limits:
nvidia.com/gpu: 1
expose:
service:
type: ClusterIP
port: 8000
grpcPort: 8001
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
## @param nemotron_nano_12b_v2_vl [object] Configuration for Nemotron Nano 12B v2 VL NIM
## @param nemotron_nano_12b_v2_vl.enabled [bool] Enable this NIM
## @param nemotron_nano_12b_v2_vl.image.repository [string] Image repository for this NIM
## @param nemotron_nano_12b_v2_vl.image.tag [string] Image tag or version
## @param nemotron_nano_12b_v2_vl.image.pullPolicy [string] Image pull policy
## @param nemotron_nano_12b_v2_vl.image.pullSecrets [array] List of image pull secrets
## @param nemotron_nano_12b_v2_vl.authSecret [string] Authentication secret for the NIM
## @param nemotron_nano_12b_v2_vl.storage.pvc.create [bool] Whether to create a persistent volume claim
## @param nemotron_nano_12b_v2_vl.storage.pvc.size [string] Size of the persistent volume claim (e.g., "300Gi")
## @param nemotron_nano_12b_v2_vl.storage.pvc.volumeAccessMode [string] Volume access mode for the PVC
## @param nemotron_nano_12b_v2_vl.replicas [int] Number of replicas
## @param nemotron_nano_12b_v2_vl.resources.limits.nvidia.com/gpu [int] Number of GPUs to allocate
## @param nemotron_nano_12b_v2_vl.expose.service.type [string] Kubernetes service type (e.g., "ClusterIP")
## @param nemotron_nano_12b_v2_vl.expose.service.port [int] HTTP service port
## @param nemotron_nano_12b_v2_vl.expose.service.grpcPort [int] gRPC service port
nemotron_nano_12b_v2_vl:
enabled: false
image:
repository: nvcr.io/nim/nvidia/nemotron-nano-12b-v2-vl
tag: 1.5.0
pullPolicy: IfNotPresent
pullSecrets:
- ngc-secret
authSecret: ngc-api
storage:
pvc:
create: true
size: "300Gi"
volumeAccessMode: "ReadWriteOnce"
replicas: 1
resources:
limits:
nvidia.com/gpu: 1
expose:
service:
type: ClusterIP
port: 8000
grpcPort: 8001
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
## @param nemotron_parse [object] Configuration for Nemotron Parse NIM
## @param nemotron_parse.enabled [bool] Enable or disable the Nemotron Parse NIM deployment.
## @param nemotron_parse.image.repository [string] Container image repository for Nemotron Parse NIM.
## @param nemotron_parse.image.tag [string] Container image tag (version) for Nemotron Parse NIM.
## @param nemotron_parse.image.pullPolicy [string] Image pull policy (e.g., Always, IfNotPresent).
## @param nemotron_parse.image.pullSecrets [array] List of Kubernetes secrets to use for pulling images.
## @param nemotron_parse.authSecret [string] Name of the Kubernetes secret containing authentication credentials for the NIM model registry.
## @param nemotron_parse.storage.pvc.create [bool] Whether to create a PersistentVolumeClaim for model or cache storage.
## @param nemotron_parse.storage.pvc.size [string] Size of the PersistentVolumeClaim (e.g., "100Gi").
## @param nemotron_parse.storage.pvc.volumeAccessMode [string] Access mode for the PVC (e.g., "ReadWriteOnce").
## @param nemotron_parse.replicas [int] Number of Pod replicas for high availability.
## @param nemotron_parse.resources.limits.nvidia.com/gpu [int] Number of GPUs to allocate to this NIM Pod.
## @param nemotron_parse.expose.service.type [string] Type of Kubernetes Service to expose the NIM (e.g., "ClusterIP", "NodePort").
## @param nemotron_parse.expose.service.port [int] HTTP port number for the service.
## @param nemotron_parse.expose.service.grpcPort [int] gRPC port number for the service.
## @param nemotron_parse.env [array] Additional environment variables for the Nemotron Parse container.
nemotron_parse:
enabled: false
image:
repository: nvcr.io/nim/nvidia/nemotron-parse
tag: 1.5.0
pullPolicy: IfNotPresent
pullSecrets:
- ngc-secret
authSecret: ngc-api
storage:
pvc:
create: true
size: "100Gi"
volumeAccessMode: "ReadWriteOnce"
replicas: 1
resources:
limits:
nvidia.com/gpu: 1
expose:
service:
type: ClusterIP
port: 8000
grpcPort: 8001
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
## @section Audio NIM Configuration