diff --git a/api/apps/v1alpha1/nimcache_types.go b/api/apps/v1alpha1/nimcache_types.go index b332364b2..57be7518a 100644 --- a/api/apps/v1alpha1/nimcache_types.go +++ b/api/apps/v1alpha1/nimcache_types.go @@ -127,7 +127,7 @@ type ModelSpec struct { Profiles []string `json:"profiles,omitempty"` // Precision is the precision for model quantization Precision string `json:"precision,omitempty"` - // Engine is the backend engine (tensort_llm, vllm) + // Engine is the backend engine (tensorrt_llm, vllm) Engine string `json:"engine,omitempty"` // TensorParallelism is the minimum GPUs required for the model computations TensorParallelism string `json:"tensorParallelism,omitempty"` diff --git a/bundle/manifests/apps.nvidia.com_nimcaches.yaml b/bundle/manifests/apps.nvidia.com_nimcaches.yaml index df2e9c357..55698a8c6 100644 --- a/bundle/manifests/apps.nvidia.com_nimcaches.yaml +++ b/bundle/manifests/apps.nvidia.com_nimcaches.yaml @@ -344,7 +344,7 @@ spec: GPUs type: boolean engine: - description: Engine is the backend engine (tensort_llm, + description: Engine is the backend engine (tensorrt_llm, vllm) type: string gpus: diff --git a/config/crd/bases/apps.nvidia.com_nimcaches.yaml b/config/crd/bases/apps.nvidia.com_nimcaches.yaml index df2e9c357..55698a8c6 100644 --- a/config/crd/bases/apps.nvidia.com_nimcaches.yaml +++ b/config/crd/bases/apps.nvidia.com_nimcaches.yaml @@ -344,7 +344,7 @@ spec: GPUs type: boolean engine: - description: Engine is the backend engine (tensort_llm, + description: Engine is the backend engine (tensorrt_llm, vllm) type: string gpus: diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml index df2e9c357..55698a8c6 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml @@ -344,7 +344,7 @@ spec: GPUs type: boolean engine: - description: Engine is the backend engine (tensort_llm, + description: Engine is the backend engine (tensorrt_llm, vllm) type: string gpus: diff --git a/internal/nimparser/v2/nimparser.go b/internal/nimparser/v2/nimparser.go index e38257c14..307764cbe 100644 --- a/internal/nimparser/v2/nimparser.go +++ b/internal/nimparser/v2/nimparser.go @@ -80,10 +80,22 @@ func (manifest NIMManifest) MatchProfiles(modelSpec appsv1alpha1.ModelSpec, disc // Determine backend type backend := profile.Tags["llm_engine"] + + // Use "model_type" if "llm_engine" is empty for non LLM models + if backend == "" { + backend = profile.Tags["model_type"] + } + + // Fallback to deprecated "backend" tag for non LLM Models if backend == "" { backend = profile.Tags["backend"] } + // modespec.Engine value can be "tensorrt_llm". If backend tag is "triton", convert it to "tensorrt". + if backend == "triton" { + backend = "tensorrt" + } + if modelSpec.Engine != "" && !strings.Contains(backend, strings.TrimSuffix(modelSpec.Engine, "_llm")) { continue }