diff --git a/api/apps/v1alpha1/common_types.go b/api/apps/v1alpha1/common_types.go index 8d41d2b45..43f61da40 100644 --- a/api/apps/v1alpha1/common_types.go +++ b/api/apps/v1alpha1/common_types.go @@ -218,6 +218,17 @@ type NGCSecret struct { Key string `json:"key"` } +// HFSecret represents the secret and key details for HuggingFace. +type HFSecret struct { + // Name of the Kubernetes secret containing HF_TOKEN key + // +kubebuilder:validation:MinLength=1 + Name string `json:"name"` + + // Key in the key containing the actual token value + // +kubebuilder:default:="HF_TOKEN" + Key string `json:"key"` +} + // PersistentVolumeClaim defines the attributes of PVC. // +kubebuilder:validation:XValidation:rule="!has(self.create) || !self.create || (has(self.size) && self.size != \"\")", message="size is required for pvc creation" // +kubebuilder:validation:XValidation:rule="!has(self.create) || !self.create || (has(self.volumeAccessMode) && self.volumeAccessMode != \"\")", message="volumeAccessMode is required for pvc creation" diff --git a/api/apps/v1alpha1/nemo_customizer_types.go b/api/apps/v1alpha1/nemo_customizer_types.go index a2fc3beea..c7f7fcc34 100644 --- a/api/apps/v1alpha1/nemo_customizer_types.go +++ b/api/apps/v1alpha1/nemo_customizer_types.go @@ -192,7 +192,10 @@ type ModelDownloadJobsConfig struct { ImagePullPolicy string `json:"imagePullPolicy,omitempty"` // NGCSecret is the secret containing the NGC API key - NGCSecret NGCSecret `json:"ngcAPISecret"` + NGCSecret NGCSecret `json:"ngcAPISecret,omitempty"` + + // HFSecret is the secret containing the HF_TOKEN key + HFSecret HFSecret `json:"hfSecret,omitempty"` // Optional security context for the job pods SecurityContext *corev1.PodSecurityContext `json:"securityContext,omitempty"` diff --git a/api/apps/v1alpha1/nemo_evaluator_types.go b/api/apps/v1alpha1/nemo_evaluator_types.go index 8c62fe458..8e6156449 100644 --- a/api/apps/v1alpha1/nemo_evaluator_types.go +++ b/api/apps/v1alpha1/nemo_evaluator_types.go @@ -119,21 +119,17 @@ type NemoEvaluatorSpec struct { EvaluationImages EvaluationImages `json:"evaluationImages"` } +// EvaluationImages for different evaluation targets. type EvaluationImages struct { - // +kubebuilder:validation:MinLength=1 - BigcodeEvalHarness string `json:"bigcodeEvalHarness"` - // +kubebuilder:validation:MinLength=1 - LmEvalHarness string `json:"lmEvalHarness"` - // +kubebuilder:validation:MinLength=1 - SimilarityMetrics string `json:"similarityMetrics"` - // +kubebuilder:validation:MinLength=1 - LlmAsJudge string `json:"llmAsJudge"` - // +kubebuilder:validation:MinLength=1 - MtBench string `json:"mtBench"` - // +kubebuilder:validation:MinLength=1 - Retriever string `json:"retriever"` - // +kubebuilder:validation:MinLength=1 - Rag string `json:"rag"` + BigcodeEvalHarness string `json:"bigcodeEvalHarness,omitempty"` + LmEvalHarness string `json:"lmEvalHarness,omitempty"` + SimilarityMetrics string `json:"similarityMetrics,omitempty"` + LlmAsJudge string `json:"llmAsJudge,omitempty"` + MtBench string `json:"mtBench,omitempty"` + Retriever string `json:"retriever,omitempty"` + Rag string `json:"rag,omitempty"` + BFCL string `json:"bfcl,omitempty"` + AgenticEval string `json:"agenticEval,omitempty"` } // NemoEvaluatorStatus defines the observed state of NemoEvaluator. @@ -197,6 +193,14 @@ func (ei EvaluationImages) GetEvaluationImageEnv() []corev1.EnvVar { Name: "RAG", Value: ei.Rag, }, + { + Name: "BFCL", + Value: ei.BFCL, + }, + { + Name: "AGENTIC_EVAL", + Value: ei.AgenticEval, + }, } } diff --git a/api/apps/v1alpha1/zz_generated.deepcopy.go b/api/apps/v1alpha1/zz_generated.deepcopy.go index b2e99a200..4403479ae 100644 --- a/api/apps/v1alpha1/zz_generated.deepcopy.go +++ b/api/apps/v1alpha1/zz_generated.deepcopy.go @@ -366,6 +366,21 @@ func (in *GuardrailConfig) DeepCopy() *GuardrailConfig { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HFSecret) DeepCopyInto(out *HFSecret) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HFSecret. +func (in *HFSecret) DeepCopy() *HFSecret { + if in == nil { + return nil + } + out := new(HFSecret) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *HorizontalPodAutoscalerSpec) DeepCopyInto(out *HorizontalPodAutoscalerSpec) { *out = *in @@ -576,6 +591,7 @@ func (in *Metrics) DeepCopy() *Metrics { func (in *ModelDownloadJobsConfig) DeepCopyInto(out *ModelDownloadJobsConfig) { *out = *in out.NGCSecret = in.NGCSecret + out.HFSecret = in.HFSecret if in.SecurityContext != nil { in, out := &in.SecurityContext, &out.SecurityContext *out = new(corev1.PodSecurityContext) diff --git a/bundle/manifests/apps.nvidia.com_nemocustomizers.yaml b/bundle/manifests/apps.nvidia.com_nemocustomizers.yaml index 11c9a83fa..7ca7a3636 100644 --- a/bundle/manifests/apps.nvidia.com_nemocustomizers.yaml +++ b/bundle/manifests/apps.nvidia.com_nemocustomizers.yaml @@ -419,6 +419,22 @@ spec: modelDownloadJobs: description: Model download job configuration properties: + hfSecret: + description: HFSecret is the secret containing the HF_TOKEN key + properties: + key: + default: HF_TOKEN + description: Key in the key containing the actual token value + type: string + name: + description: Name of the Kubernetes secret containing HF_TOKEN + key + minLength: 1 + type: string + required: + - key + - name + type: object image: description: Docker image used for model download jobs minLength: 1 @@ -664,7 +680,6 @@ spec: type: integer required: - image - - ngcAPISecret - pollIntervalSeconds - ttlSecondsAfterFinished type: object diff --git a/bundle/manifests/apps.nvidia.com_nemoevaluators.yaml b/bundle/manifests/apps.nvidia.com_nemoevaluators.yaml index 6d7451d6d..62c7e436d 100644 --- a/bundle/manifests/apps.nvidia.com_nemoevaluators.yaml +++ b/bundle/manifests/apps.nvidia.com_nemoevaluators.yaml @@ -291,35 +291,24 @@ spec: description: EvaluationImages defines the external images used for evaluation properties: + agenticEval: + type: string + bfcl: + type: string bigcodeEvalHarness: - minLength: 1 type: string llmAsJudge: - minLength: 1 type: string lmEvalHarness: - minLength: 1 type: string mtBench: - minLength: 1 type: string rag: - minLength: 1 type: string retriever: - minLength: 1 type: string similarityMetrics: - minLength: 1 type: string - required: - - bigcodeEvalHarness - - llmAsJudge - - lmEvalHarness - - mtBench - - rag - - retriever - - similarityMetrics type: object expose: description: ExposeV1 defines attributes to expose the service. diff --git a/config/crd/bases/apps.nvidia.com_nemocustomizers.yaml b/config/crd/bases/apps.nvidia.com_nemocustomizers.yaml index 11c9a83fa..7ca7a3636 100644 --- a/config/crd/bases/apps.nvidia.com_nemocustomizers.yaml +++ b/config/crd/bases/apps.nvidia.com_nemocustomizers.yaml @@ -419,6 +419,22 @@ spec: modelDownloadJobs: description: Model download job configuration properties: + hfSecret: + description: HFSecret is the secret containing the HF_TOKEN key + properties: + key: + default: HF_TOKEN + description: Key in the key containing the actual token value + type: string + name: + description: Name of the Kubernetes secret containing HF_TOKEN + key + minLength: 1 + type: string + required: + - key + - name + type: object image: description: Docker image used for model download jobs minLength: 1 @@ -664,7 +680,6 @@ spec: type: integer required: - image - - ngcAPISecret - pollIntervalSeconds - ttlSecondsAfterFinished type: object diff --git a/config/crd/bases/apps.nvidia.com_nemoevaluators.yaml b/config/crd/bases/apps.nvidia.com_nemoevaluators.yaml index 6d7451d6d..62c7e436d 100644 --- a/config/crd/bases/apps.nvidia.com_nemoevaluators.yaml +++ b/config/crd/bases/apps.nvidia.com_nemoevaluators.yaml @@ -291,35 +291,24 @@ spec: description: EvaluationImages defines the external images used for evaluation properties: + agenticEval: + type: string + bfcl: + type: string bigcodeEvalHarness: - minLength: 1 type: string llmAsJudge: - minLength: 1 type: string lmEvalHarness: - minLength: 1 type: string mtBench: - minLength: 1 type: string rag: - minLength: 1 type: string retriever: - minLength: 1 type: string similarityMetrics: - minLength: 1 type: string - required: - - bigcodeEvalHarness - - llmAsJudge - - lmEvalHarness - - mtBench - - rag - - retriever - - similarityMetrics type: object expose: description: ExposeV1 defines attributes to expose the service. diff --git a/config/samples/nemo/25.04/README.md b/config/samples/nemo/25.04/README.md new file mode 100644 index 000000000..7707b620f --- /dev/null +++ b/config/samples/nemo/25.04/README.md @@ -0,0 +1,36 @@ +# NeMo Custom Resources + +These CRs are designed to deploy NeMo microservices using the NIM Operator. + +## Compatible NIM Operator Version + +- **NIM Operator v2.0.0** + +> Using these CRs with any other version may lead to validation or runtime errors. + +## Notes + +- The CR schema and fields in this version match the capabilities of NIM Operator v2.0.0. + +## Upgrade Notes + +If upgrading from a previous NeMo service version (e.g., `25.04`) using the existing operator version: +- Check for renamed or deprecated fields. +- Review updated model config parameters. +- Revalidate against the new CR using: + + ```bash + kubectl apply --dry-run=server -f apps_v1alpha1_nemodatastore.yaml \ + -f apps_v1alpha1_nemocustomizer.yaml \ + -f apps_v1alpha1_nemoentitystore.yaml \ + -f apps_v1alpha1_nemoguardrails.yaml \ + -f apps_v1alpha1_nemoevaluator.yaml + ``` + + ```text + nemodatastore.apps.nvidia.com/nemodatastore-sample created (server dry run) + nemocustomizer.apps.nvidia.com/nemocustomizer-sample created (server dry run) + nemoentitystore.apps.nvidia.com/nemoentitystore-sample created (server dry run) + nemoguardrail.apps.nvidia.com/nemoguardrails-sample configured (server dry run) + nemoevaluator.apps.nvidia.com/nemoevaluator-sample created (server dry run) + ``` diff --git a/config/samples/nemo/25.04/apps_v1alpha1_nemocustomizer.yaml b/config/samples/nemo/25.04/apps_v1alpha1_nemocustomizer.yaml new file mode 100644 index 000000000..f965f7ea3 --- /dev/null +++ b/config/samples/nemo/25.04/apps_v1alpha1_nemocustomizer.yaml @@ -0,0 +1,122 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NemoCustomizer +metadata: + name: nemocustomizer-sample + namespace: nemo +spec: + # Scheduler configuration for training jobs (volcano (default)) + scheduler: + type: "volcano" + # Weights & Biases configuration for experiment tracking + wandb: + secretName: wandb-secret # Kubernetes secret that stores WANDB_API_KEY and optionally encryption key + apiKeyKey: apiKey # Key in the secret that holds the W&B API key + encryptionKey: encryptionKey # Key in the secret that holds optional encryption key + # OpenTelemetry tracing configuration + otel: + enabled: true + exporterOtlpEndpoint: http://customizer-otel-opentelemetry-collector.nemo.svc.cluster.local:4317 + # PostgreSQL database connection configuration + databaseConfig: + credentials: + user: ncsuser # Database username + secretName: customizer-pg-existing-secret # Secret containing password + passwordKey: password # Key inside secret that contains the password + host: customizer-pg-postgresql.nemo.svc.cluster.local + port: 5432 + databaseName: ncsdb + # Customizer API service exposure settings + expose: + service: + type: ClusterIP + port: 8000 + # Global image pull settings used in various subcomponents + image: + repository: nvcr.io/nvidia/nemo-microservices/customizer-api + tag: "25.04" + pullPolicy: IfNotPresent + pullSecrets: + - ngc-secret + # URL to the NeMo Entity Store microservice + entitystore: + endpoint: http://nemoentitystore-sample.nemo.svc.cluster.local:8000 + # URL to the NeMo Data Store microservice + datastore: + endpoint: http://nemodatastore-sample.nemo.svc.cluster.local:8000 + # URL for MLflow tracking server + mlflow: + endpoint: http://mlflow-tracking.nemo.svc.cluster.local:80 + # Configuration for the data store CLI tools + nemoDatastoreTools: + image: nvcr.io/nvidia/nemo-microservices/nds-v2-huggingface-cli:25.04 + # Configuration for model download jobs + modelDownloadJobs: + image: "nvcr.io/nvidia/nemo-microservices/customizer-api:25.04" + ngcAPISecret: + # Secret that stores NGC API key + name: ngc-api-secret + # Key inside secret + key: "NGC_API_KEY" + securityContext: + fsGroup: 1000 + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + # Time (in seconds) to retain job after completion + ttlSecondsAfterFinished: 600 + # Polling frequency to check job status + pollIntervalSeconds: 15 + # Name to the ConfigMap containing model definitions + modelConfig: + name: nemo-model-config + # Training configuration + trainingConfig: + configMap: + # Optional: Additional configuration to merge into training config + name: nemo-training-config + # PVC where model artifacts are cached or used during training + modelPVC: + create: true + name: finetuning-ms-models-pvc + # StorageClass for the PVC (can be empty to use default) + storageClass: "" + volumeAccessMode: ReadWriteOnce + size: 50Gi + # Workspace PVC automatically created per job + workspacePVC: + storageClass: "local-path" + volumeAccessMode: ReadWriteOnce + size: 10Gi + # Mount path for workspace inside container + mountPath: /pvc/workspace + image: + repository: nvcr.io/nvidia/nemo-microservices/customizer + tag: "25.04" + env: + - name: LOG_LEVEL + value: INFO + # Multi-node networking environment variables for training (CSPs) + networkConfig: + - name: NCCL_IB_SL + value: "0" + - name: NCCL_IB_TC + value: "41" + - name: NCCL_IB_QPS_PER_CONNECTION + value: "4" + - name: UCX_TLS + value: TCP + - name: UCX_NET_DEVICES + value: eth0 + - name: HCOLL_ENABLE_MCAST_ALL + value: "0" + - name: NCCL_IB_GID_INDEX + value: "3" + # TTL for training job after it completes + ttlSecondsAfterFinished: 3600 + # Timeout duration (in seconds) for training job + timeout: 3600 + # Node tolerations + tolerations: + - key: "nvidia.com/gpu" + operator: "Exists" + effect: "NoSchedule" diff --git a/config/samples/nemo/25.04/apps_v1alpha1_nemodatastore.yaml b/config/samples/nemo/25.04/apps_v1alpha1_nemodatastore.yaml new file mode 100644 index 000000000..5b74e9be0 --- /dev/null +++ b/config/samples/nemo/25.04/apps_v1alpha1_nemodatastore.yaml @@ -0,0 +1,44 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NemoDatastore +metadata: + name: nemodatastore-sample + namespace: nemo +spec: + secrets: + datastoreConfigSecret: "nemo-ms-nemo-datastore" + datastoreInitSecret: "nemo-ms-nemo-datastore-init" + datastoreInlineConfigSecret: "nemo-ms-nemo-datastore-inline-config" + giteaAdminSecret: "gitea-admin-credentials" + lfsJwtSecret: "nemo-ms-nemo-datastore--lfs-jwt" + databaseConfig: + credentials: + user: ndsuser + secretName: datastore-pg-existing-secret + passwordKey: password + host: datastore-pg-postgresql.nemo.svc.cluster.local + port: 5432 + databaseName: ndsdb + pvc: + name: "pvc-shared-data" + create: true + storageClass: "" + volumeAccessMode: ReadWriteOnce + size: "10Gi" + expose: + service: + type: ClusterIP + port: 8000 + image: + repository: nvcr.io/nvidia/nemo-microservices/datastore + tag: "25.04" + pullPolicy: IfNotPresent + pullSecrets: + - ngc-secret + replicas: 1 + resources: + requests: + memory: "256Mi" + cpu: "500m" + limits: + memory: "512Mi" + cpu: "1" diff --git a/config/samples/nemo/25.04/apps_v1alpha1_nemoentitystore.yaml b/config/samples/nemo/25.04/apps_v1alpha1_nemoentitystore.yaml new file mode 100644 index 000000000..a938c13f1 --- /dev/null +++ b/config/samples/nemo/25.04/apps_v1alpha1_nemoentitystore.yaml @@ -0,0 +1,27 @@ +--- +apiVersion: apps.nvidia.com/v1alpha1 +kind: NemoEntitystore +metadata: + name: nemoentitystore-sample + namespace: nemo +spec: + image: + repository: nvcr.io/nvidia/nemo-microservices/entity-store + tag: "25.04" + pullPolicy: IfNotPresent + pullSecrets: + - ngc-secret + expose: + service: + type: ClusterIP + port: 8000 + databaseConfig: + databaseName: nesdb + host: entity-store-pg-postgresql.nemo.svc.cluster.local + port: 5432 + credentials: + user: nesuser + secretName: entity-store-pg-existing-secret + passwordKey: password + datastore: + endpoint: http://nemodatastore-sample.nemo.svc.cluster.local:8000 diff --git a/config/samples/nemo/25.04/apps_v1alpha1_nemoevaluator.yaml b/config/samples/nemo/25.04/apps_v1alpha1_nemoevaluator.yaml new file mode 100644 index 000000000..f0c043db4 --- /dev/null +++ b/config/samples/nemo/25.04/apps_v1alpha1_nemoevaluator.yaml @@ -0,0 +1,45 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NemoEvaluator +metadata: + name: nemoevaluator-sample + namespace: nemo +spec: + evaluationImages: + bigcodeEvalHarness: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-bigcode:0.12.13" + lmEvalHarness: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-lm-eval-harness:0.12.15" + similarityMetrics: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-custom-eval:0.12.13" + llmAsJudge: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-llm-as-a-judge:0.12.15" + mtBench: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-llm-as-a-judge:0.12.15" + retriever: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-retriever:0.12.13" + rag: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-rag:0.12.13" + image: + repository: nvcr.io/nvidia/nemo-microservices/evaluator + tag: "25.04" + pullPolicy: IfNotPresent + pullSecrets: + - ngc-secret + expose: + service: + type: ClusterIP + port: 8000 + argoWorkflows: + endpoint: https://argo-workflows-server.nemo.svc.cluster.local:2746 + serviceAccount: argo-workflows-executor + vectorDB: + endpoint: http://milvus.nemo.svc.cluster.local:19530 + datastore: + endpoint: http://nemodatastore-sample.nemo.svc.cluster.local:8000/v1/hf + entitystore: + endpoint: http://nemoentitystore-sample.nemo.svc.cluster.local:8000 + databaseConfig: + host: evaluator-pg-postgresql.nemo.svc.cluster.local + port: 5432 + databaseName: evaldb + credentials: + user: evaluser + secretName: evaluator-pg-existing-secret + passwordKey: password + otel: + enabled: true + exporterOtlpEndpoint: http://evaluator-otel-opentelemetry-collector.nemo.svc.cluster.local:4317 + replicas: 1 \ No newline at end of file diff --git a/config/samples/nemo/25.04/apps_v1alpha1_nemoguardrails.yaml b/config/samples/nemo/25.04/apps_v1alpha1_nemoguardrails.yaml new file mode 100644 index 000000000..58d6175d5 --- /dev/null +++ b/config/samples/nemo/25.04/apps_v1alpha1_nemoguardrails.yaml @@ -0,0 +1,33 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NemoGuardrail +metadata: + name: nemoguardrails-sample + namespace: nemo +spec: + # required if a NIM endpoint is hosted by NVIDIA + configStore: + pvc: + name: "pvc-guardrail-config" + create: true + storageClass: "" + volumeAccessMode: ReadWriteOnce + size: "1Gi" + nimEndpoint: + baseURL: "http://meta-llama3-1b-instruct.nemo.svc.cluster.local:8000/v1" + expose: + service: + type: ClusterIP + port: 8000 + image: + repository: nvcr.io/nvidia/nemo-microservices/guardrails + tag: "25.04" + pullPolicy: IfNotPresent + pullSecrets: + - ngc-secret + metrics: + serviceMonitor: {} + replicas: 1 + resources: + limits: + cpu: "1" + ephemeral-storage: 10Gi diff --git a/config/samples/nemo/25.04/llama3-1b-pipeline.yaml b/config/samples/nemo/25.04/llama3-1b-pipeline.yaml new file mode 100644 index 000000000..f07d1a345 --- /dev/null +++ b/config/samples/nemo/25.04/llama3-1b-pipeline.yaml @@ -0,0 +1,64 @@ +--- +apiVersion: apps.nvidia.com/v1alpha1 +kind: NIMCache +metadata: + name: meta-llama3-1b-instruct + namespace: nemo +spec: + source: + ngc: + modelPuller: nvcr.io/nim/meta/llama-3.2-1b-instruct:1.8.3 + pullSecret: ngc-secret + authSecret: ngc-api-secret + model: + engine: tensorrt_llm + tensorParallelism: "1" + storage: + pvc: + create: true + storageClass: "" + size: "50Gi" + volumeAccessMode: ReadWriteOnce + +--- +apiVersion: apps.nvidia.com/v1alpha1 +kind: NIMPipeline +metadata: + name: llama3-1b-pipeline + namespace: nemo +spec: + services: + - name: meta-llama3-1b-instruct + enabled: true + spec: + env: + - name: NIM_PEFT_SOURCE + value: http://nemoentitystore-sample.nemo.svc.cluster.local:8000 + - name: NIM_PEFT_REFRESH_INTERVAL + value: "180" + - name: NIM_MAX_CPU_LORAS + value: "16" + - name: NIM_MAX_GPU_LORAS + value: "8" + - name: NIM_GUIDED_DECODING_BACKEND + value: fast_outlines + image: + repository: nvcr.io/nim/meta/llama-3.2-1b-instruct + tag: 1.8.3 + pullPolicy: IfNotPresent + pullSecrets: + - ngc-secret + authSecret: ngc-api-secret + storage: + nimCache: + name: meta-llama3-1b-instruct + profile: '' + replicas: 1 + resources: + limits: + nvidia.com/gpu: 1 + expose: + service: + type: ClusterIP + port: 8000 + diff --git a/config/samples/nemo/25.04/nemocustomizer_config.yaml b/config/samples/nemo/25.04/nemocustomizer_config.yaml new file mode 100644 index 000000000..d9dc9969a --- /dev/null +++ b/config/samples/nemo/25.04/nemocustomizer_config.yaml @@ -0,0 +1,247 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: nemo-training-config + namespace: nemo +data: + training: | + # Optional additional configuration for training jobs + container_defaults: + imagePullPolicy: IfNotPresent + +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: nemo-model-config + namespace: nemo +data: + models: | + # -- Llama 3.2 3B Instruct model configuration. + # @default -- This object has the following default values for the Llama 3.2 3B Instruct model. + meta/llama-3.2-3b-instruct: + # -- Whether to enable the model. + enabled: false + # -- NGC model URI. + model_uri: ngc://nvidia/nemo/llama-3_2-3b-instruct:2.0 + # -- Path where model files are stored. + model_path: llama32_3b-instruct + # -- Training options for different fine-tuning methods. + training_options: + - training_type: sft + finetuning_type: lora + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + # -- Micro batch size for training. + micro_batch_size: 1 + # -- Maximum sequence length for input tokens. + max_seq_length: 4096 + # -- Number of model parameters. + num_parameters: 3000000000 + # -- Model precision format. + precision: bf16-mixed + # -- Template for formatting prompts. + prompt_template: "{prompt} {completion}" + + # -- Llama 3.2 1B model configuration. + # @default -- This object has the following default values for the Llama 3.2 1B model. + meta/llama-3.2-1b: + # -- Whether to enable the model. + enabled: false + # -- NGC model URI for Llama 3.2 1B model. + model_uri: ngc://nvidia/nemo/llama-3_2-1b:2.0 + # -- Path where model files are stored. + model_path: llama32_1b + # -- Training options for different fine-tuning methods. + training_options: + - training_type: sft + finetuning_type: lora + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + - training_type: sft + finetuning_type: all_weights + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + # -- Micro batch size for training. + micro_batch_size: 1 + # -- Maximum sequence length for input tokens. + max_seq_length: 4096 + # -- Number of model parameters. + num_parameters: 1000000000 + # -- Model precision format. + precision: bf16-mixed + # -- Template for formatting prompts. + prompt_template: "{prompt} {completion}" + + # -- Llama 3.2 1B Instruct model configuration. + # @default -- This object has the following default values for the Llama 3.2 1B Instruct model. + meta/llama-3.2-1b-instruct: + # -- Whether to enable the model. + enabled: true + # -- NGC model URI for Llama 3.2 1B Instruct model. + model_uri: ngc://nvidia/nemo/llama-3_2-1b-instruct:2.0 + # -- Path where model files are stored. + model_path: llama32_1b-instruct + # -- Training options for different fine-tuning methods. + training_options: + - training_type: sft + finetuning_type: lora + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + - training_type: sft + finetuning_type: all_weights + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + # -- Micro batch size for training. + micro_batch_size: 1 + # -- Maximum sequence length for input tokens. + max_seq_length: 4096 + # -- Number of model parameters. + num_parameters: 1000000000 + # -- Model precision format. + precision: bf16-mixed + # -- Template for formatting prompts. + prompt_template: "{prompt} {completion}" + + # -- Llama 3 70B Instruct model configuration. + # @default -- This object has the following default values for the Llama 3 70B Instruct model. + meta/llama3-70b-instruct: + # -- Whether to enable the model. + enabled: false + # -- NGC model URI for Llama 3 70B Instruct model. + model_uri: ngc://nvidia/nemo/llama-3-70b-instruct-nemo:2.0 + # -- Path where model files are stored. + model_path: llama-3-70b-bf16 + # -- Training options for different fine-tuning methods. + training_options: + - training_type: sft + finetuning_type: lora + num_gpus: 4 + num_nodes: 1 + tensor_parallel_size: 4 + # -- Maximum sequence length for input tokens. + max_seq_length: 4096 + # -- Number of model parameters. + num_parameters: 70000000000 + # -- Micro batch size for training. + micro_batch_size: 1 + # -- Model precision format. + precision: bf16-mixed + # -- Template for formatting prompts. + prompt_template: "{prompt} {completion}" + + # -- Llama 3.1 8B Instruct model configuration. + # @default -- This object has the following default values for the Llama 3.1 8B Instruct model. + meta/llama-3.1-8b-instruct: + # -- Whether to enable the model. + enabled: true + # -- NGC model URI for Llama 3.1 8B Instruct model. + model_uri: ngc://nvidia/nemo/llama-3_1-8b-instruct-nemo:2.0 + # -- Path where model files are stored. + model_path: llama-3_1-8b-instruct_0_0_1 + # -- Training options for different fine-tuning methods. + training_options: + - training_type: sft + finetuning_type: lora + num_gpus: 1 + - training_type: sft + finetuning_type: all_weights + num_gpus: 8 + num_nodes: 1 + tensor_parallel_size: 4 + # -- Micro batch size for training. + micro_batch_size: 1 + # -- Maximum sequence length for input tokens. + max_seq_length: 4096 + # -- Number of model parameters. + num_parameters: 8000000000 + # -- Model precision format. + precision: bf16-mixed + # -- Template for formatting prompts. + prompt_template: "{prompt} {completion}" + + # -- Llama 3.1 70B Instruct model configuration. + # @default -- This object has the following default values for the Llama 3.1 70B Instruct model. + meta/llama-3.1-70b-instruct: + # -- Whether to enable the model. + enabled: false + # -- NGC model URI for Llama 3.1 70B Instruct model. + model_uri: ngc://nvidia/nemo/llama-3_1-70b-instruct-nemo:2.0 + # -- Path where model files are stored. + model_path: llama-3_1-70b-instruct_0_0_1 + # -- Training options for different fine-tuning methods. + training_options: + - training_type: sft + finetuning_type: lora + num_gpus: 4 + num_nodes: 1 + tensor_parallel_size: 4 + # -- Micro batch size for training. + micro_batch_size: 1 + # -- Maximum sequence length for input tokens. + max_seq_length: 4096 + # -- Number of model parameters. + num_parameters: 70000000000 + # -- Model precision format. + precision: bf16-mixed + # -- Template for formatting prompts. + prompt_template: "{prompt} {completion}" + + # -- Phi-4 model configuration. + # @default -- This object has the following default values for the Phi-4. + microsoft/phi-4: + # -- Whether to enable the model. + enabled: false + # -- NGC model URI for Phi-4 model. + model_uri: ngc://nvidia/nemo/phi-4:1.0 + # -- Path where model files are stored. + model_path: phi-4 + # -- Training options for different fine-tuning methods. + training_options: + - training_type: sft + finetuning_type: lora + num_gpus: 1 + num_nodes: 1 + # -- Micro batch size for training. + micro_batch_size: 1 + # -- Maximum sequence length for input tokens. + max_seq_length: 4096 + # -- Number of model parameters. + num_parameters: 14659507200 + # -- Model precision format. + precision: bf16 + # -- Template for formatting prompts. + prompt_template: "{prompt} {completion}" + + # -- Llama 3.3 70B Instruct model configuration. + # @default -- This object has the following default values for the Llama 3.3 70B Instruct model. + meta/llama-3.3-70b-instruct: + # -- Whether to enable the model. + enabled: false + # -- NGC model URI for Llama 3.3 70B Instruct model. + model_uri: ngc://nvidia/nemo/llama-3_3-70b-instruct:2.0 + # -- Path where model files are stored. + model_path: llama-3_3-70b-instruct_0_0_1 + # -- Training options for different fine-tuning methods. + training_options: + - training_type: sft + finetuning_type: lora + num_gpus: 4 + num_nodes: 1 + tensor_parallel_size: 4 + # -- Micro batch size for training. + micro_batch_size: 1 + # -- Maximum sequence length for input tokens. + max_seq_length: 4096 + # -- Number of model parameters. + num_parameters: 70000000000 + # -- Model precision format. + precision: bf16-mixed + # -- Template for formatting prompts. + prompt_template: "{prompt} {completion}" diff --git a/config/samples/nemo/latest/README.md b/config/samples/nemo/latest/README.md index 7707b620f..1ddac3f34 100644 --- a/config/samples/nemo/latest/README.md +++ b/config/samples/nemo/latest/README.md @@ -4,17 +4,17 @@ These CRs are designed to deploy NeMo microservices using the NIM Operator. ## Compatible NIM Operator Version -- **NIM Operator v2.0.0** +- **NIM Operator v2.0.1** > Using these CRs with any other version may lead to validation or runtime errors. ## Notes -- The CR schema and fields in this version match the capabilities of NIM Operator v2.0.0. +- The CR schema and fields in this version match the capabilities of NIM Operator v2.0.1. ## Upgrade Notes -If upgrading from a previous NeMo service version (e.g., `25.04`) using the existing operator version: +If upgrading from a previous NeMo service version (e.g., `25.06`) using the existing operator version: - Check for renamed or deprecated fields. - Review updated model config parameters. - Revalidate against the new CR using: diff --git a/config/samples/nemo/latest/VERSION b/config/samples/nemo/latest/VERSION index e4aadacce..4bf503303 100644 --- a/config/samples/nemo/latest/VERSION +++ b/config/samples/nemo/latest/VERSION @@ -1 +1 @@ -25.04 \ No newline at end of file +25.06 \ No newline at end of file diff --git a/config/samples/nemo/latest/apps_v1alpha1_nemocustomizer.yaml b/config/samples/nemo/latest/apps_v1alpha1_nemocustomizer.yaml index f965f7ea3..169c1fb33 100644 --- a/config/samples/nemo/latest/apps_v1alpha1_nemocustomizer.yaml +++ b/config/samples/nemo/latest/apps_v1alpha1_nemocustomizer.yaml @@ -33,7 +33,7 @@ spec: # Global image pull settings used in various subcomponents image: repository: nvcr.io/nvidia/nemo-microservices/customizer-api - tag: "25.04" + tag: "25.06" pullPolicy: IfNotPresent pullSecrets: - ngc-secret @@ -48,10 +48,10 @@ spec: endpoint: http://mlflow-tracking.nemo.svc.cluster.local:80 # Configuration for the data store CLI tools nemoDatastoreTools: - image: nvcr.io/nvidia/nemo-microservices/nds-v2-huggingface-cli:25.04 + image: nvcr.io/nvidia/nemo-microservices/nds-v2-huggingface-cli:25.06 # Configuration for model download jobs modelDownloadJobs: - image: "nvcr.io/nvidia/nemo-microservices/customizer-api:25.04" + image: "nvcr.io/nvidia/nemo-microservices/customizer-api:25.06" ngcAPISecret: # Secret that stores NGC API key name: ngc-api-secret @@ -91,7 +91,7 @@ spec: mountPath: /pvc/workspace image: repository: nvcr.io/nvidia/nemo-microservices/customizer - tag: "25.04" + tag: "25.06" env: - name: LOG_LEVEL value: INFO diff --git a/config/samples/nemo/latest/apps_v1alpha1_nemodatastore.yaml b/config/samples/nemo/latest/apps_v1alpha1_nemodatastore.yaml index 5b74e9be0..7b95e4e17 100644 --- a/config/samples/nemo/latest/apps_v1alpha1_nemodatastore.yaml +++ b/config/samples/nemo/latest/apps_v1alpha1_nemodatastore.yaml @@ -30,7 +30,7 @@ spec: port: 8000 image: repository: nvcr.io/nvidia/nemo-microservices/datastore - tag: "25.04" + tag: "25.06" pullPolicy: IfNotPresent pullSecrets: - ngc-secret diff --git a/config/samples/nemo/latest/apps_v1alpha1_nemoentitystore.yaml b/config/samples/nemo/latest/apps_v1alpha1_nemoentitystore.yaml index a938c13f1..b00dc5466 100644 --- a/config/samples/nemo/latest/apps_v1alpha1_nemoentitystore.yaml +++ b/config/samples/nemo/latest/apps_v1alpha1_nemoentitystore.yaml @@ -7,7 +7,7 @@ metadata: spec: image: repository: nvcr.io/nvidia/nemo-microservices/entity-store - tag: "25.04" + tag: "25.06" pullPolicy: IfNotPresent pullSecrets: - ngc-secret diff --git a/config/samples/nemo/latest/apps_v1alpha1_nemoevaluator.yaml b/config/samples/nemo/latest/apps_v1alpha1_nemoevaluator.yaml index 0d88e4354..ee0867e73 100644 --- a/config/samples/nemo/latest/apps_v1alpha1_nemoevaluator.yaml +++ b/config/samples/nemo/latest/apps_v1alpha1_nemoevaluator.yaml @@ -5,16 +5,18 @@ metadata: namespace: nemo spec: evaluationImages: - bigcodeEvalHarness: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-bigcode:0.12.13" - lmEvalHarness: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-lm-eval-harness:0.12.15" - similarityMetrics: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-custom-eval:0.12.13" - llmAsJudge: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-llm-as-a-judge:0.12.15" - mtBench: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-llm-as-a-judge:0.12.15" - retriever: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-retriever:0.12.13" - rag: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-rag:0.12.13" + bigcodeEvalHarness: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-bigcode:0.12.21" + lmEvalHarness: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-lm-eval-harness:0.12.21" + similarityMetrics: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-custom-eval:0.12.21" + llmAsJudge: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-llm-as-a-judge:0.12.21" + mtBench: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-llm-as-a-judge:0.12.21" + retriever: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-retriever:0.12.21" + rag: "nvcr.io/nvidia/nemo-microservices/eval-tool-benchmark-rag:0.12.21" + bfcl: "nvcr.io/nvidia/nemo-microservices/eval-factory-benchmark-bfcl:25.6.1" + agenticEval: "nvcr.io/nvidia/nemo-microservices/eval-factory-benchmark-agentic-eval:25.6.1" image: repository: nvcr.io/nvidia/nemo-microservices/evaluator - tag: "25.04" + tag: "25.06" pullPolicy: IfNotPresent pullSecrets: - ngc-secret diff --git a/config/samples/nemo/latest/apps_v1alpha1_nemoguardrails.yaml b/config/samples/nemo/latest/apps_v1alpha1_nemoguardrails.yaml index 58d6175d5..31b6e8241 100644 --- a/config/samples/nemo/latest/apps_v1alpha1_nemoguardrails.yaml +++ b/config/samples/nemo/latest/apps_v1alpha1_nemoguardrails.yaml @@ -20,7 +20,7 @@ spec: port: 8000 image: repository: nvcr.io/nvidia/nemo-microservices/guardrails - tag: "25.04" + tag: "25.06" pullPolicy: IfNotPresent pullSecrets: - ngc-secret diff --git a/config/samples/nemo/latest/nemocustomizer_config.yaml b/config/samples/nemo/latest/nemocustomizer_config.yaml index d9dc9969a..acb50441d 100644 --- a/config/samples/nemo/latest/nemocustomizer_config.yaml +++ b/config/samples/nemo/latest/nemocustomizer_config.yaml @@ -17,231 +17,469 @@ metadata: name: nemo-model-config namespace: nemo data: - models: | - # -- Llama 3.2 3B Instruct model configuration. - # @default -- This object has the following default values for the Llama 3.2 3B Instruct model. - meta/llama-3.2-3b-instruct: - # -- Whether to enable the model. - enabled: false - # -- NGC model URI. - model_uri: ngc://nvidia/nemo/llama-3_2-3b-instruct:2.0 - # -- Path where model files are stored. - model_path: llama32_3b-instruct - # -- Training options for different fine-tuning methods. - training_options: - - training_type: sft - finetuning_type: lora + customizationTargets: | + overrideExistingTargets: true + targets: + meta/llama-3.1-8b-instruct@2.0: + base_model: meta/llama-3.1-8b-instruct + enabled: true + model_path: llama-3_1-8b-instruct_2_0 + model_uri: ngc://nvidia/nemo/llama-3_1-8b-instruct-nemo:2.0 + name: llama-3.1-8b-instruct@2.0 + namespace: meta + num_parameters: 8000000000 + precision: bf16-mixed + meta/llama-3.1-70b-instruct@2.0: + base_model: meta/llama-3.1-70b-instruct + enabled: false + model_path: llama-3_1-70b-instruct_2_0 + model_uri: ngc://nvidia/nemo/llama-3_1-70b-instruct-nemo:2.0 + name: llama-3.1-70b-instruct@2.0 + namespace: meta + num_parameters: 70000000000 + precision: bf16-mixed + meta/llama-3.2-1b-embedding@0.0.1: + base_model: meta/llama-3.2-1b-embedding + enabled: false + model_path: llama32_1b-embedding + model_uri: ngc://nvidia/nemo/llama-3_2-1b-embedding-base:0.0.1 + name: llama-3.2-1b-embedding@0.0.1 + namespace: meta + num_parameters: 1000000000 + precision: bf16-mixed + meta/llama-3.2-1b-instruct@2.0: + base_model: meta/llama-3.2-1b-instruct + enabled: true + model_path: llama32_1b-instruct_2_0 + model_uri: ngc://nvidia/nemo/llama-3_2-1b-instruct:2.0 + name: llama-3.2-1b-instruct@2.0 + namespace: meta + num_parameters: 1000000000 + precision: bf16-mixed + meta/llama-3.2-1b@2.0: + base_model: meta/llama-3.2-1b + enabled: false + model_path: llama32_1b_2_0 + model_uri: ngc://nvidia/nemo/llama-3_2-1b:2.0 + name: llama-3.2-1b@2.0 + namespace: meta + num_parameters: 1000000000 + precision: bf16-mixed + meta/llama-3.2-3b-instruct@2.0: + base_model: meta/llama-3.2-3b-instruct + enabled: false + model_path: llama32_3b-instruct_2_0 + model_uri: ngc://nvidia/nemo/llama-3_2-3b-instruct:2.0 + name: llama-3.2-3b-instruct@2.0 + namespace: meta + num_parameters: 3000000000 + precision: bf16-mixed + meta/llama-3.3-70b-instruct@2.0: + base_model: meta/llama-3.3-70b-instruct + enabled: false + model_path: llama-3_3-70b-instruct_2_0 + model_uri: ngc://nvidia/nemo/llama-3_3-70b-instruct:2.0 + name: llama-3.3-70b-instruct@2.0 + namespace: meta + num_parameters: 70000000000 + precision: bf16-mixed + meta/llama3-70b-instruct@2.0: + base_model: meta/llama3-70b-instruct + enabled: false + model_path: llama-3-70b-bf16_2_0 + model_uri: ngc://nvidia/nemo/llama-3-70b-instruct-nemo:2.0 + name: llama3-70b-instruct@2.0 + namespace: meta + num_parameters: 70000000000 + precision: bf16-mixed + microsoft/phi-4@1.0: + base_model: microsoft/phi-4 + enabled: false + model_path: phi-4_1_0 + model_uri: ngc://nvidia/nemo/phi-4:1.0 + name: phi-4@1.0 + namespace: microsoft + num_parameters: 14659507200 + precision: bf16 + version: "1.0" + nvidia/nemotron-nano-llama-3.1-8b@1.0: + base_model: nvidia/nemotron-nano-llama-3.1-8b + enabled: false + model_path: nemotron-nano-3_1-8b_0_0_1 + model_uri: ngc://nvidia/nemo/nemotron-nano-3_1-8b:0.0.1 + name: nemotron-nano-llama-3.1-8b@1.0 + namespace: nvidia + num_parameters: 8000000000 + precision: bf16-mixed + nvidia/nemotron-super-llama-3.3-49b@1.0: + base_model: nvidia/nemotron-super-llama-3.3-49b + enabled: false + model_path: nemotron-super-3_3-49b_v1 + model_uri: ngc://nvidia/nemo/nemotron-super-3_3-49b:v1 + name: nemotron-super-llama-3.3-49b@1.0 + namespace: nvidia + num_parameters: 8000000000 + precision: bf16-mixed + + customizationConfigTemplates: | + overrideExistingTemplates: true + templates: + meta/llama-3.1-8b-instruct@v1.0.0+A100: + max_seq_length: 4096 + name: llama-3.1-8b-instruct@v1.0.0+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.1-8b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 8 + num_nodes: 1 + tensor_parallel_size: 4 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 1 + tensor_parallel_size: 4 + training_type: distillation + meta/llama-3.1-8b-instruct@v1.0.0+L40: + max_seq_length: 4096 + name: llama-3.1-8b-instruct@v1.0.0+L40 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.1-8b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 2 + tensor_parallel_size: 2 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 2 + tensor_parallel_size: 4 + training_type: sft + meta/llama-3.1-70b-instruct@v1.0.0+A100: + max_seq_length: 4096 + name: llama-3.1-70b-instruct@v1.0.0+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.1-70b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 1 + tensor_parallel_size: 4 + training_type: sft + meta/llama-3.1-70b-instruct@v1.0.0+L40: + max_seq_length: 4096 + name: llama-3.1-70b-instruct@v1.0.0+L40 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.1-70b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 2 + pipeline_parallel_size: 2 + tensor_parallel_size: 4 + training_type: sft + meta/llama-3.2-1b-embedding@0.0.1+A100: + max_seq_length: 2048 + name: llama-3.2-1b-embedding@0.0.1+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.2-1b-embedding@0.0.1 + training_options: + - finetuning_type: all_weights + micro_batch_size: 8 num_gpus: 1 num_nodes: 1 tensor_parallel_size: 1 - # -- Micro batch size for training. - micro_batch_size: 1 - # -- Maximum sequence length for input tokens. - max_seq_length: 4096 - # -- Number of model parameters. - num_parameters: 3000000000 - # -- Model precision format. - precision: bf16-mixed - # -- Template for formatting prompts. - prompt_template: "{prompt} {completion}" - - # -- Llama 3.2 1B model configuration. - # @default -- This object has the following default values for the Llama 3.2 1B model. - meta/llama-3.2-1b: - # -- Whether to enable the model. - enabled: false - # -- NGC model URI for Llama 3.2 1B model. - model_uri: ngc://nvidia/nemo/llama-3_2-1b:2.0 - # -- Path where model files are stored. - model_path: llama32_1b - # -- Training options for different fine-tuning methods. - training_options: - - training_type: sft - finetuning_type: lora + training_type: sft + meta/llama-3.2-1b-embedding@0.0.1+L40: + max_seq_length: 2048 + name: llama-3.2-1b-embedding@0.0.1+L40 + namespace: meta + target: meta/llama-3.2-1b-embedding@0.0.1 + training_options: + - finetuning_type: all_weights + micro_batch_size: 4 num_gpus: 1 num_nodes: 1 tensor_parallel_size: 1 - - training_type: sft - finetuning_type: all_weights + training_type: sft + meta/llama-3.2-1b-instruct@v1.0.0+A100: + max_seq_length: 4096 + name: llama-3.2-1b-instruct@v1.0.0+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.2-1b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 num_gpus: 1 num_nodes: 1 tensor_parallel_size: 1 - # -- Micro batch size for training. - micro_batch_size: 1 - # -- Maximum sequence length for input tokens. - max_seq_length: 4096 - # -- Number of model parameters. - num_parameters: 1000000000 - # -- Model precision format. - precision: bf16-mixed - # -- Template for formatting prompts. - prompt_template: "{prompt} {completion}" - - # -- Llama 3.2 1B Instruct model configuration. - # @default -- This object has the following default values for the Llama 3.2 1B Instruct model. - meta/llama-3.2-1b-instruct: - # -- Whether to enable the model. - enabled: true - # -- NGC model URI for Llama 3.2 1B Instruct model. - model_uri: ngc://nvidia/nemo/llama-3_2-1b-instruct:2.0 - # -- Path where model files are stored. - model_path: llama32_1b-instruct - # -- Training options for different fine-tuning methods. - training_options: - - training_type: sft - finetuning_type: lora + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 num_gpus: 1 num_nodes: 1 tensor_parallel_size: 1 - - training_type: sft - finetuning_type: all_weights + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 num_gpus: 1 num_nodes: 1 tensor_parallel_size: 1 - # -- Micro batch size for training. - micro_batch_size: 1 - # -- Maximum sequence length for input tokens. - max_seq_length: 4096 - # -- Number of model parameters. - num_parameters: 1000000000 - # -- Model precision format. - precision: bf16-mixed - # -- Template for formatting prompts. - prompt_template: "{prompt} {completion}" - - # -- Llama 3 70B Instruct model configuration. - # @default -- This object has the following default values for the Llama 3 70B Instruct model. - meta/llama3-70b-instruct: - # -- Whether to enable the model. - enabled: false - # -- NGC model URI for Llama 3 70B Instruct model. - model_uri: ngc://nvidia/nemo/llama-3-70b-instruct-nemo:2.0 - # -- Path where model files are stored. - model_path: llama-3-70b-bf16 - # -- Training options for different fine-tuning methods. - training_options: - - training_type: sft - finetuning_type: lora - num_gpus: 4 + training_type: distillation + meta/llama-3.2-1b-instruct@v1.0.0+L40: + max_seq_length: 4096 + name: llama-3.2-1b-instruct@v1.0.0+L40 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.2-1b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 num_nodes: 1 - tensor_parallel_size: 4 - # -- Maximum sequence length for input tokens. - max_seq_length: 4096 - # -- Number of model parameters. - num_parameters: 70000000000 - # -- Micro batch size for training. - micro_batch_size: 1 - # -- Model precision format. - precision: bf16-mixed - # -- Template for formatting prompts. - prompt_template: "{prompt} {completion}" - - # -- Llama 3.1 8B Instruct model configuration. - # @default -- This object has the following default values for the Llama 3.1 8B Instruct model. - meta/llama-3.1-8b-instruct: - # -- Whether to enable the model. - enabled: true - # -- NGC model URI for Llama 3.1 8B Instruct model. - model_uri: ngc://nvidia/nemo/llama-3_1-8b-instruct-nemo:2.0 - # -- Path where model files are stored. - model_path: llama-3_1-8b-instruct_0_0_1 - # -- Training options for different fine-tuning methods. - training_options: - - training_type: sft - finetuning_type: lora + tensor_parallel_size: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 num_gpus: 1 - - training_type: sft - finetuning_type: all_weights - num_gpus: 8 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + meta/llama-3.2-1b@v1.0.0+A100: + max_seq_length: 4096 + name: llama-3.2-1b@v1.0.0+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.2-1b@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: distillation + meta/llama-3.2-1b@v1.0.0+L40: + max_seq_length: 4096 + name: llama-3.2-1b@v1.0.0+L40 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.2-1b@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + meta/llama-3.2-3b-instruct@v1.0.0+A100: + max_seq_length: 4096 + name: llama-3.2-3b-instruct@v1.0.0+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.2-3b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 2 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: distillation + meta/llama-3.2-3b-instruct@v1.0.0+L40: + max_seq_length: 4096 + name: llama-3.2-3b-instruct@v1.0.0+L40 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.2-3b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + meta/llama-3.3-70b-instruct@v1.0.0+A100: + max_seq_length: 4096 + name: llama-3.3-70b-instruct@v1.0.0+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.3-70b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 4 num_nodes: 1 tensor_parallel_size: 4 - # -- Micro batch size for training. - micro_batch_size: 1 - # -- Maximum sequence length for input tokens. - max_seq_length: 4096 - # -- Number of model parameters. - num_parameters: 8000000000 - # -- Model precision format. - precision: bf16-mixed - # -- Template for formatting prompts. - prompt_template: "{prompt} {completion}" - - # -- Llama 3.1 70B Instruct model configuration. - # @default -- This object has the following default values for the Llama 3.1 70B Instruct model. - meta/llama-3.1-70b-instruct: - # -- Whether to enable the model. - enabled: false - # -- NGC model URI for Llama 3.1 70B Instruct model. - model_uri: ngc://nvidia/nemo/llama-3_1-70b-instruct-nemo:2.0 - # -- Path where model files are stored. - model_path: llama-3_1-70b-instruct_0_0_1 - # -- Training options for different fine-tuning methods. - training_options: - - training_type: sft - finetuning_type: lora + training_type: sft + meta/llama-3.3-70b-instruct@v1.0.0+L40: + max_seq_length: 4096 + name: llama-3.3-70b-instruct@v1.0.0+L40 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.3-70b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 2 + pipeline_parallel_size: 2 + tensor_parallel_size: 4 + training_type: sft + meta/llama3-70b-instruct@v1.0.0+A100: + max_seq_length: 4096 + name: llama3-70b-instruct@v1.0.0+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama3-70b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 num_gpus: 4 num_nodes: 1 tensor_parallel_size: 4 - # -- Micro batch size for training. - micro_batch_size: 1 - # -- Maximum sequence length for input tokens. - max_seq_length: 4096 - # -- Number of model parameters. - num_parameters: 70000000000 - # -- Model precision format. - precision: bf16-mixed - # -- Template for formatting prompts. - prompt_template: "{prompt} {completion}" - - # -- Phi-4 model configuration. - # @default -- This object has the following default values for the Phi-4. - microsoft/phi-4: - # -- Whether to enable the model. - enabled: false - # -- NGC model URI for Phi-4 model. - model_uri: ngc://nvidia/nemo/phi-4:1.0 - # -- Path where model files are stored. - model_path: phi-4 - # -- Training options for different fine-tuning methods. - training_options: - - training_type: sft - finetuning_type: lora + training_type: sft + meta/llama3-70b-instruct@v1.0.0+L40: + max_seq_length: 4096 + name: llama3-70b-instruct@v1.0.0+L40 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama3-70b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 2 + pipeline_parallel_size: 2 + tensor_parallel_size: 4 + training_type: sft + microsoft/phi-4@v1.0.0+A100: + max_seq_length: 4096 + name: phi-4@v1.0.0+A100 + namespace: microsoft + prompt_template: '{prompt} {completion}' + target: microsoft/phi-4@1.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 num_gpus: 1 num_nodes: 1 - # -- Micro batch size for training. - micro_batch_size: 1 - # -- Maximum sequence length for input tokens. - max_seq_length: 4096 - # -- Number of model parameters. - num_parameters: 14659507200 - # -- Model precision format. - precision: bf16 - # -- Template for formatting prompts. - prompt_template: "{prompt} {completion}" - - # -- Llama 3.3 70B Instruct model configuration. - # @default -- This object has the following default values for the Llama 3.3 70B Instruct model. - meta/llama-3.3-70b-instruct: - # -- Whether to enable the model. - enabled: false - # -- NGC model URI for Llama 3.3 70B Instruct model. - model_uri: ngc://nvidia/nemo/llama-3_3-70b-instruct:2.0 - # -- Path where model files are stored. - model_path: llama-3_3-70b-instruct_0_0_1 - # -- Training options for different fine-tuning methods. - training_options: - - training_type: sft + training_type: sft + microsoft/phi-4@v1.0.0+L40: + max_seq_length: 4096 + name: phi-4@v1.0.0+L40 + namespace: microsoft + prompt_template: '{prompt} {completion}' + target: microsoft/phi-4@1.0 + training_options: + - data_parallel_size: 2 finetuning_type: lora + micro_batch_size: 1 + num_gpus: 2 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + nvidia/nemotron-nano-llama-3.1-8b@v1.0.0+A100: + max_seq_length: 4096 + name: nemotron-nano-llama-3.1-8b@v1.0.0+A100 + namespace: nvidia + prompt_template: '{prompt} {completion}' + target: nvidia/nemotron-nano-llama-3.1-8b@1.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 8 + num_nodes: 1 + tensor_parallel_size: 4 + training_type: sft + nvidia/nemotron-nano-llama-3.1-8b@v1.0.0+L40: + max_seq_length: 4096 + name: nemotron-nano-llama-3.1-8b@v1.0.0+L40 + namespace: nvidia + prompt_template: '{prompt} {completion}' + target: nvidia/nemotron-nano-llama-3.1-8b@1.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 2 + num_nodes: 1 + tensor_parallel_size: 2 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 2 + pipeline_parallel_size: 2 + tensor_parallel_size: 4 + training_type: sft + nvidia/nemotron-super-llama-3.3-49b@v1.0.0+A100: + max_seq_length: 4096 + name: nemotron-super-llama-3.3-49b@v1.0.0+A100 + namespace: nvidia + prompt_template: '{prompt} {completion}' + target: nvidia/nemotron-super-llama-3.3-49b@1.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 num_gpus: 4 num_nodes: 1 tensor_parallel_size: 4 - # -- Micro batch size for training. - micro_batch_size: 1 - # -- Maximum sequence length for input tokens. - max_seq_length: 4096 - # -- Number of model parameters. - num_parameters: 70000000000 - # -- Model precision format. - precision: bf16-mixed - # -- Template for formatting prompts. - prompt_template: "{prompt} {completion}" + training_type: sft + nvidia/nemotron-super-llama-3.3-49b@v1.0.0+L40: + max_seq_length: 4096 + name: nemotron-super-llama-3.3-49b@v1.0.0+L40 + namespace: nvidia + prompt_template: '{prompt} {completion}' + target: nvidia/nemotron-super-llama-3.3-49b@1.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 2 + pipeline_parallel_size: 2 + tensor_parallel_size: 4 + training_type: sft diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemocustomizers.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemocustomizers.yaml index 11c9a83fa..7ca7a3636 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemocustomizers.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemocustomizers.yaml @@ -419,6 +419,22 @@ spec: modelDownloadJobs: description: Model download job configuration properties: + hfSecret: + description: HFSecret is the secret containing the HF_TOKEN key + properties: + key: + default: HF_TOKEN + description: Key in the key containing the actual token value + type: string + name: + description: Name of the Kubernetes secret containing HF_TOKEN + key + minLength: 1 + type: string + required: + - key + - name + type: object image: description: Docker image used for model download jobs minLength: 1 @@ -664,7 +680,6 @@ spec: type: integer required: - image - - ngcAPISecret - pollIntervalSeconds - ttlSecondsAfterFinished type: object diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoevaluators.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoevaluators.yaml index 6d7451d6d..62c7e436d 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoevaluators.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nemoevaluators.yaml @@ -291,35 +291,24 @@ spec: description: EvaluationImages defines the external images used for evaluation properties: + agenticEval: + type: string + bfcl: + type: string bigcodeEvalHarness: - minLength: 1 type: string llmAsJudge: - minLength: 1 type: string lmEvalHarness: - minLength: 1 type: string mtBench: - minLength: 1 type: string rag: - minLength: 1 type: string retriever: - minLength: 1 type: string similarityMetrics: - minLength: 1 type: string - required: - - bigcodeEvalHarness - - llmAsJudge - - lmEvalHarness - - mtBench - - rag - - retriever - - similarityMetrics type: object expose: description: ExposeV1 defines attributes to expose the service. diff --git a/internal/controller/nemo_evaluator_controller_test.go b/internal/controller/nemo_evaluator_controller_test.go index 39d3d616f..b6e87574c 100644 --- a/internal/controller/nemo_evaluator_controller_test.go +++ b/internal/controller/nemo_evaluator_controller_test.go @@ -206,6 +206,8 @@ var _ = Describe("NemoEvaluator Controller", func() { MtBench: "MtBench", Retriever: "Retriever", Rag: "Rag", + BFCL: "BFCL", + AgenticEval: "AgenticEval", }, }, Status: appsv1alpha1.NemoEvaluatorStatus{ @@ -462,6 +464,8 @@ var _ = Describe("NemoEvaluator Controller", func() { corev1.EnvVar{Name: "MT_BENCH", Value: nemoEvaluator.Spec.EvaluationImages.MtBench}, corev1.EnvVar{Name: "RETRIEVER", Value: nemoEvaluator.Spec.EvaluationImages.Retriever}, corev1.EnvVar{Name: "RAG", Value: nemoEvaluator.Spec.EvaluationImages.Rag}, + corev1.EnvVar{Name: "BFCL", Value: nemoEvaluator.Spec.EvaluationImages.BFCL}, + corev1.EnvVar{Name: "AGENTIC_EVAL", Value: nemoEvaluator.Spec.EvaluationImages.AgenticEval}, )) }) diff --git a/internal/controller/nemocustomizer_controller.go b/internal/controller/nemocustomizer_controller.go index 2bbbfc17a..32a3fbd12 100644 --- a/internal/controller/nemocustomizer_controller.go +++ b/internal/controller/nemocustomizer_controller.go @@ -22,6 +22,8 @@ import ( "fmt" "reflect" + goerrors "errors" + "github.com/go-logr/logr" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" appsv1 "k8s.io/api/apps/v1" @@ -509,7 +511,15 @@ func (r *NemoCustomizerReconciler) renderCustomizerConfig(ctx context.Context, n return nil, err } - if err := r.addModelConfig(ctx, cfg, n); err != nil { + if err := r.addModelConfig(ctx, n, cfg); err != nil { + return nil, err + } + + if err := r.addCustomizationTargetConfig(ctx, n, cfg); err != nil { + return nil, err + } + + if err := r.addCustomizationConfigTemplates(ctx, n, cfg); err != nil { return nil, err } @@ -557,10 +567,21 @@ func (r *NemoCustomizerReconciler) addModelDownloadJobsConfig(ctx context.Contex "imagePullSecrets": pullSecrets, "ngcAPISecret": n.Spec.ModelDownloadJobs.NGCSecret.Name, "ngcAPISecretKey": n.Spec.ModelDownloadJobs.NGCSecret.Key, + "ngcSecretName": n.Spec.ModelDownloadJobs.NGCSecret.Name, + "ngcSecretKey": n.Spec.ModelDownloadJobs.NGCSecret.Key, "securityContext": n.Spec.ModelDownloadJobs.SecurityContext, "ttlSecondsAfterFinished": n.Spec.ModelDownloadJobs.TTLSecondsAfterFinished, "pollIntervalSeconds": n.Spec.ModelDownloadJobs.PollIntervalSeconds, } + + // add HF secret only if present + if n.Spec.ModelDownloadJobs.HFSecret.Name != "" { + if modelDownloadJobs, ok := cfg["model_download_jobs"].(map[string]interface{}); ok { + modelDownloadJobs["hfSecretName"] = n.Spec.ModelDownloadJobs.HFSecret.Name + modelDownloadJobs["hfSecretKey"] = n.Spec.ModelDownloadJobs.HFSecret.Key + } + } + return nil } @@ -679,9 +700,13 @@ func (r *NemoCustomizerReconciler) addTrainingConfig(ctx context.Context, cfg ma return nil } -func (r *NemoCustomizerReconciler) addModelConfig(ctx context.Context, cfg map[string]interface{}, n *appsv1alpha1.NemoCustomizer) error { +func (r *NemoCustomizerReconciler) addModelConfig(ctx context.Context, n *appsv1alpha1.NemoCustomizer, cfg map[string]interface{}) error { modelsRaw, err := k8sutil.GetRawYAMLFromConfigMap(ctx, r.GetClient(), n.GetNamespace(), n.Spec.Models.Name, "models") if err != nil { + if goerrors.Is(err, k8sutil.ErrConfigMapKeyNotFound) { + // Ignore missing models key + return nil + } return fmt.Errorf("loading models config: %w", err) } @@ -694,6 +719,44 @@ func (r *NemoCustomizerReconciler) addModelConfig(ctx context.Context, cfg map[s return nil } +func (r *NemoCustomizerReconciler) addCustomizationTargetConfig(ctx context.Context, n *appsv1alpha1.NemoCustomizer, cfg map[string]interface{}) error { + customizationTargetsRaw, err := k8sutil.GetRawYAMLFromConfigMap(ctx, r.GetClient(), n.GetNamespace(), n.Spec.Models.Name, "customizationTargets") + if err != nil { + if goerrors.Is(err, k8sutil.ErrConfigMapKeyNotFound) { + // Ignore missing customizationTargetConfig key + return nil + } + return fmt.Errorf("loading models config: %w", err) + } + + var customizationTargets map[string]interface{} + if err := yaml.Unmarshal([]byte(customizationTargetsRaw), &customizationTargets); err != nil { + return fmt.Errorf("parsing customization targets config: %w", err) + } + + cfg["customizationTargets"] = customizationTargets + return nil +} + +func (r *NemoCustomizerReconciler) addCustomizationConfigTemplates(ctx context.Context, n *appsv1alpha1.NemoCustomizer, cfg map[string]interface{}) error { + customizationConfigTemplatesRaw, err := k8sutil.GetRawYAMLFromConfigMap(ctx, r.GetClient(), n.GetNamespace(), n.Spec.Models.Name, "customizationConfigTemplates") + if err != nil { + if goerrors.Is(err, k8sutil.ErrConfigMapKeyNotFound) { + // Ignore missing customizationConfigTemplates key + return nil + } + return fmt.Errorf("loading models config: %w", err) + } + + var customizationConfigTemplates map[string]interface{} + if err := yaml.Unmarshal([]byte(customizationConfigTemplatesRaw), &customizationConfigTemplates); err != nil { + return fmt.Errorf("parsing customization config templates config: %w", err) + } + + cfg["customizationConfigTemplates"] = customizationConfigTemplates + return nil +} + func (r *NemoCustomizerReconciler) renderAndSyncResource(ctx context.Context, nemoCustomizer *appsv1alpha1.NemoCustomizer, renderer *render.Renderer, obj client.Object, renderFunc func() (client.Object, error), conditionType string, reason string) error { logger := log.FromContext(ctx) diff --git a/internal/controller/nemocustomizer_controller_test.go b/internal/controller/nemocustomizer_controller_test.go index 226cfffad..eaee40a51 100644 --- a/internal/controller/nemocustomizer_controller_test.go +++ b/internal/controller/nemocustomizer_controller_test.go @@ -109,10 +109,12 @@ var _ = Describe("NemoCustomizer Controller", func() { Expect(err).ToNot(HaveOccurred()) trainingCM := loadConfigMapFromFile(filepath.Join(testDataDir, "training_config.yaml")) modelsCM := loadConfigMapFromFile(filepath.Join(testDataDir, "models_config.yaml")) + modelTargetsCM := loadConfigMapFromFile(filepath.Join(testDataDir, "models_config_targets.yaml")) // Register the test ConfigMaps in the cluster Expect(reconciler.GetClient().Create(ctx, trainingCM)).To(Succeed()) Expect(reconciler.GetClient().Create(ctx, modelsCM)).To(Succeed()) + Expect(reconciler.GetClient().Create(ctx, modelTargetsCM)).To(Succeed()) nemoCustomizer = &appsv1alpha1.NemoCustomizer{ ObjectMeta: metav1.ObjectMeta{ @@ -365,6 +367,13 @@ var _ = Describe("NemoCustomizer Controller", func() { if err := k8sClient.Get(ctx, modelsCMName, modelsCM); err == nil { Expect(k8sClient.Delete(ctx, modelsCM)).To(Succeed()) } + + // Delete the models targets config + modelTargetsCMName := types.NamespacedName{Name: "nemo-model-target-config", Namespace: "default"} + modelTargetsCM := &corev1.ConfigMap{} + if err := k8sClient.Get(ctx, modelTargetsCMName, modelTargetsCM); err == nil { + Expect(k8sClient.Delete(ctx, modelTargetsCM)).To(Succeed()) + } }) Describe("Reconcile", func() { @@ -575,6 +584,74 @@ var _ = Describe("NemoCustomizer Controller", func() { Expect(parsed).To(HaveKeyWithValue("mlflow_tracking_url", "http://mlflow-tracking.nemo.svc.cluster.local:80")) }) + It("should create customizer config with model targets and templates", func() { + namespacedName := types.NamespacedName{Name: nemoCustomizer.Name, Namespace: "default"} + + // use models config map with targets and templates + nemoCustomizer.Spec.Models.Name = "nemo-model-config-targets" + + err := client.Create(context.TODO(), nemoCustomizer) + Expect(err).NotTo(HaveOccurred()) + err = client.Create(context.TODO(), secrets) + Expect(err).NotTo(HaveOccurred()) + + result, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: namespacedName}) + Expect(err).NotTo(HaveOccurred()) + Expect(result).To(Equal(ctrl.Result{})) + + err = client.Get(ctx, namespacedName, nemoCustomizer) + Expect(err).ToNot(HaveOccurred()) + Expect(nemoCustomizer.Finalizers).To(ContainElement(NemoCustomizerFinalizer)) + + // Check that the customizer config map was created + configMap := &corev1.ConfigMap{} + err = client.Get(context.TODO(), types.NamespacedName{ + Name: nemoCustomizer.Name, + Namespace: nemoCustomizer.Namespace, + }, configMap) + Expect(err).NotTo(HaveOccurred()) + + // Verify metadata + Expect(configMap.Name).To(Equal(nemoCustomizer.GetName())) + Expect(configMap.Namespace).To(Equal(nemoCustomizer.Namespace)) + // Verify key exists + Expect(configMap.Data).To(HaveKey("config.yaml")) + configData := configMap.Data["config.yaml"] + Expect(configData).NotTo(BeEmpty()) + + // Verify presence of top-level keys + Expect(configData).To(ContainSubstring("training:")) + Expect(configData).To(ContainSubstring("customizationTargets:")) + Expect(configData).To(ContainSubstring("customizationConfigTemplates:")) + Expect(configData).ToNot(ContainSubstring("models:")) // "models" is deprecated from NMR v25.06 + Expect(configData).To(ContainSubstring("model_download_jobs:")) + + // Unmarshal the full merged config + var parsed map[string]interface{} + err = yaml.Unmarshal([]byte(configData), &parsed) + Expect(err).NotTo(HaveOccurred()) + + // Validate model download jobs config with new secret params from NMR v25.06 + training, ok := parsed["model_download_jobs"].(map[string]interface{}) + Expect(ok).To(BeTrue(), "expected 'training' to be a map") + Expect(training).To(HaveKey("ngcSecretName")) + Expect(training).To(HaveKey("ngcSecretKey")) + + // Validate model targets + modelTargets, ok := parsed["customizationTargets"].(map[string]interface{}) + Expect(ok).To(BeTrue(), "expected 'modelTargets' to be a map") + Expect(modelTargets).To(HaveKey("targets")) + + // Validate model config templates + configTemplates, ok := parsed["customizationConfigTemplates"].(map[string]interface{}) + Expect(ok).To(BeTrue(), "expected 'configTemplates' to be a map") + Expect(configTemplates).To(HaveKey("templates")) + + Expect(parsed).To(HaveKeyWithValue("entity_store_url", "http://nemoentitystore-sample.nemo.svc.cluster.local:8000")) + Expect(parsed).To(HaveKeyWithValue("nemo_data_store_url", "http://nemodatastore-sample.nemo.svc.cluster.local:8000")) + Expect(parsed).To(HaveKeyWithValue("mlflow_tracking_url", "http://mlflow-tracking.nemo.svc.cluster.local:80")) + }) + It("should delete HPA when NemoCustomizer is updated", func() { namespacedName := types.NamespacedName{Name: nemoCustomizer.Name, Namespace: "default"} err := client.Create(context.TODO(), nemoCustomizer) diff --git a/internal/controller/testdata/models_config_targets.yaml b/internal/controller/testdata/models_config_targets.yaml new file mode 100644 index 000000000..344eba777 --- /dev/null +++ b/internal/controller/testdata/models_config_targets.yaml @@ -0,0 +1,472 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: nemo-model-config-targets + namespace: default +data: + customizationTargets: | + overrideExistingTargets: true + targets: + meta/llama-3.1-8b-instruct@2.0: + base_model: meta/llama-3.1-8b-instruct + enabled: false + model_path: llama-3_1-8b-instruct_2_0 + model_uri: ngc://nvidia/nemo/llama-3_1-8b-instruct-nemo:2.0 + name: llama-3.1-8b-instruct@2.0 + namespace: meta + num_parameters: 8000000000 + precision: bf16-mixed + meta/llama-3.1-70b-instruct@2.0: + base_model: meta/llama-3.1-70b-instruct + enabled: false + model_path: llama-3_1-70b-instruct_2_0 + model_uri: ngc://nvidia/nemo/llama-3_1-70b-instruct-nemo:2.0 + name: llama-3.1-70b-instruct@2.0 + namespace: meta + num_parameters: 70000000000 + precision: bf16-mixed + meta/llama-3.2-1b-embedding@0.0.1: + base_model: meta/llama-3.2-1b-embedding + enabled: false + model_path: llama32_1b-embedding + model_uri: ngc://nvidia/nemo/llama-3_2-1b-embedding-base:0.0.1 + name: llama-3.2-1b-embedding@0.0.1 + namespace: meta + num_parameters: 1000000000 + precision: bf16-mixed + meta/llama-3.2-1b-instruct@2.0: + base_model: meta/llama-3.2-1b-instruct + enabled: false + model_path: llama32_1b-instruct_2_0 + model_uri: ngc://nvidia/nemo/llama-3_2-1b-instruct:2.0 + name: llama-3.2-1b-instruct@2.0 + namespace: meta + num_parameters: 1000000000 + precision: bf16-mixed + meta/llama-3.2-1b@2.0: + base_model: meta/llama-3.2-1b + enabled: false + model_path: llama32_1b_2_0 + model_uri: ngc://nvidia/nemo/llama-3_2-1b:2.0 + name: llama-3.2-1b@2.0 + namespace: meta + num_parameters: 1000000000 + precision: bf16-mixed + meta/llama-3.2-3b-instruct@2.0: + base_model: meta/llama-3.2-3b-instruct + enabled: false + model_path: llama32_3b-instruct_2_0 + model_uri: ngc://nvidia/nemo/llama-3_2-3b-instruct:2.0 + name: llama-3.2-3b-instruct@2.0 + namespace: meta + num_parameters: 3000000000 + precision: bf16-mixed + meta/llama-3.3-70b-instruct@2.0: + base_model: meta/llama-3.3-70b-instruct + enabled: false + model_path: llama-3_3-70b-instruct_2_0 + model_uri: ngc://nvidia/nemo/llama-3_3-70b-instruct:2.0 + name: llama-3.3-70b-instruct@2.0 + namespace: meta + num_parameters: 70000000000 + precision: bf16-mixed + meta/llama3-70b-instruct@2.0: + base_model: meta/llama3-70b-instruct + enabled: false + model_path: llama-3-70b-bf16_2_0 + model_uri: ngc://nvidia/nemo/llama-3-70b-instruct-nemo:2.0 + name: llama3-70b-instruct@2.0 + namespace: meta + num_parameters: 70000000000 + precision: bf16-mixed + microsoft/phi-4@1.0: + base_model: microsoft/phi-4 + enabled: false + model_path: phi-4_1_0 + model_uri: ngc://nvidia/nemo/phi-4:1.0 + name: phi-4@1.0 + namespace: microsoft + num_parameters: 14659507200 + precision: bf16 + version: "1.0" + nvidia/nemotron-nano-llama-3.1-8b@1.0: + base_model: nvidia/nemotron-nano-llama-3.1-8b + enabled: false + model_path: nemotron-nano-3_1-8b_0_0_1 + model_uri: ngc://nvidia/nemo/nemotron-nano-3_1-8b:0.0.1 + name: nemotron-nano-llama-3.1-8b@1.0 + namespace: nvidia + num_parameters: 8000000000 + precision: bf16-mixed + nvidia/nemotron-super-llama-3.3-49b@1.0: + base_model: nvidia/nemotron-super-llama-3.3-49b + enabled: false + model_path: nemotron-super-3_3-49b_v1 + model_uri: ngc://nvidia/nemo/nemotron-super-3_3-49b:v1 + name: nemotron-super-llama-3.3-49b@1.0 + namespace: nvidia + num_parameters: 8000000000 + precision: bf16-mixed + + customizationConfigTemplates: | + overrideExistingTemplates: true + templates: + meta/llama-3.1-8b-instruct@v1.0.0+A100: + max_seq_length: 4096 + name: llama-3.1-8b-instruct@v1.0.0+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.1-8b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 8 + num_nodes: 1 + tensor_parallel_size: 4 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 1 + tensor_parallel_size: 4 + training_type: distillation + meta/llama-3.1-8b-instruct@v1.0.0+L40: + max_seq_length: 4096 + name: llama-3.1-8b-instruct@v1.0.0+L40 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.1-8b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 2 + tensor_parallel_size: 2 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 2 + tensor_parallel_size: 4 + training_type: sft + meta/llama-3.1-70b-instruct@v1.0.0+A100: + max_seq_length: 4096 + name: llama-3.1-70b-instruct@v1.0.0+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.1-70b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 1 + tensor_parallel_size: 4 + training_type: sft + meta/llama-3.1-70b-instruct@v1.0.0+L40: + max_seq_length: 4096 + name: llama-3.1-70b-instruct@v1.0.0+L40 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.1-70b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 2 + pipeline_parallel_size: 2 + tensor_parallel_size: 4 + training_type: sft + meta/llama-3.2-1b-embedding@0.0.1+A100: + max_seq_length: 2048 + name: llama-3.2-1b-embedding@0.0.1+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.2-1b-embedding@0.0.1 + training_options: + - finetuning_type: all_weights + micro_batch_size: 8 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + meta/llama-3.2-1b-embedding@0.0.1+L40: + max_seq_length: 2048 + name: llama-3.2-1b-embedding@0.0.1+L40 + namespace: meta + target: meta/llama-3.2-1b-embedding@0.0.1 + training_options: + - finetuning_type: all_weights + micro_batch_size: 4 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + meta/llama-3.2-1b-instruct@v1.0.0+A100: + max_seq_length: 4096 + name: llama-3.2-1b-instruct@v1.0.0+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.2-1b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: distillation + meta/llama-3.2-1b-instruct@v1.0.0+L40: + max_seq_length: 4096 + name: llama-3.2-1b-instruct@v1.0.0+L40 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.2-1b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + meta/llama-3.2-1b@v1.0.0+A100: + max_seq_length: 4096 + name: llama-3.2-1b@v1.0.0+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.2-1b@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: distillation + meta/llama-3.2-1b@v1.0.0+L40: + max_seq_length: 4096 + name: llama-3.2-1b@v1.0.0+L40 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.2-1b@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + meta/llama-3.2-3b-instruct@v1.0.0+A100: + max_seq_length: 4096 + name: llama-3.2-3b-instruct@v1.0.0+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.2-3b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 2 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: distillation + meta/llama-3.2-3b-instruct@v1.0.0+L40: + max_seq_length: 4096 + name: llama-3.2-3b-instruct@v1.0.0+L40 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.2-3b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + meta/llama-3.3-70b-instruct@v1.0.0+A100: + max_seq_length: 4096 + name: llama-3.3-70b-instruct@v1.0.0+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.3-70b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 1 + tensor_parallel_size: 4 + training_type: sft + meta/llama-3.3-70b-instruct@v1.0.0+L40: + max_seq_length: 4096 + name: llama-3.3-70b-instruct@v1.0.0+L40 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama-3.3-70b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 2 + pipeline_parallel_size: 2 + tensor_parallel_size: 4 + training_type: sft + meta/llama3-70b-instruct@v1.0.0+A100: + max_seq_length: 4096 + name: llama3-70b-instruct@v1.0.0+A100 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama3-70b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 1 + tensor_parallel_size: 4 + training_type: sft + meta/llama3-70b-instruct@v1.0.0+L40: + max_seq_length: 4096 + name: llama3-70b-instruct@v1.0.0+L40 + namespace: meta + prompt_template: '{prompt} {completion}' + target: meta/llama3-70b-instruct@2.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 2 + pipeline_parallel_size: 2 + tensor_parallel_size: 4 + training_type: sft + microsoft/phi-4@v1.0.0+A100: + max_seq_length: 4096 + name: phi-4@v1.0.0+A100 + namespace: microsoft + prompt_template: '{prompt} {completion}' + target: microsoft/phi-4@1.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + training_type: sft + microsoft/phi-4@v1.0.0+L40: + max_seq_length: 4096 + name: phi-4@v1.0.0+L40 + namespace: microsoft + prompt_template: '{prompt} {completion}' + target: microsoft/phi-4@1.0 + training_options: + - data_parallel_size: 2 + finetuning_type: lora + micro_batch_size: 1 + num_gpus: 2 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + nvidia/nemotron-nano-llama-3.1-8b@v1.0.0+A100: + max_seq_length: 4096 + name: nemotron-nano-llama-3.1-8b@v1.0.0+A100 + namespace: nvidia + prompt_template: '{prompt} {completion}' + target: nvidia/nemotron-nano-llama-3.1-8b@1.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 1 + num_nodes: 1 + tensor_parallel_size: 1 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 8 + num_nodes: 1 + tensor_parallel_size: 4 + training_type: sft + nvidia/nemotron-nano-llama-3.1-8b@v1.0.0+L40: + max_seq_length: 4096 + name: nemotron-nano-llama-3.1-8b@v1.0.0+L40 + namespace: nvidia + prompt_template: '{prompt} {completion}' + target: nvidia/nemotron-nano-llama-3.1-8b@1.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 2 + num_nodes: 1 + tensor_parallel_size: 2 + training_type: sft + - finetuning_type: all_weights + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 2 + pipeline_parallel_size: 2 + tensor_parallel_size: 4 + training_type: sft + nvidia/nemotron-super-llama-3.3-49b@v1.0.0+A100: + max_seq_length: 4096 + name: nemotron-super-llama-3.3-49b@v1.0.0+A100 + namespace: nvidia + prompt_template: '{prompt} {completion}' + target: nvidia/nemotron-super-llama-3.3-49b@1.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 1 + tensor_parallel_size: 4 + training_type: sft + nvidia/nemotron-super-llama-3.3-49b@v1.0.0+L40: + max_seq_length: 4096 + name: nemotron-super-llama-3.3-49b@v1.0.0+L40 + namespace: nvidia + prompt_template: '{prompt} {completion}' + target: nvidia/nemotron-super-llama-3.3-49b@1.0 + training_options: + - finetuning_type: lora + micro_batch_size: 1 + num_gpus: 4 + num_nodes: 2 + pipeline_parallel_size: 2 + tensor_parallel_size: 4 + training_type: sft diff --git a/internal/k8sutil/k8sutil.go b/internal/k8sutil/k8sutil.go index 15ac89ef4..667ee3580 100644 --- a/internal/k8sutil/k8sutil.go +++ b/internal/k8sutil/k8sutil.go @@ -18,6 +18,7 @@ package k8sutil import ( "context" + goerrors "errors" "fmt" appsv1 "k8s.io/api/apps/v1" @@ -31,6 +32,9 @@ import ( "github.com/NVIDIA/k8s-nim-operator/internal/utils" ) +// ErrConfigMapKeyNotFound indicates an error that the given key is missing from the config map. +var ErrConfigMapKeyNotFound = goerrors.New("configmap key not found") + // OrchestratorType is the underlying container orchestrator type. type OrchestratorType string @@ -283,7 +287,7 @@ func GetRawYAMLFromConfigMap(ctx context.Context, k8sClient client.Client, names raw, ok := cm.Data[configMapKey] if !ok { - return "", fmt.Errorf("key %q not found in ConfigMap %q", configMapKey, configMapName) + return "", fmt.Errorf("%w: key %q not found in ConfigMap %q", ErrConfigMapKeyNotFound, configMapKey, configMapName) } return raw, nil