Replace the deprecated backend with model_type tags for non LLM nims (#513)

visheshtanksale · visheshtanksale · commit 4a4ac8f9ff39 · 2025-06-03T09:50:37.000-07:00
* Replace the deprecated backend with model_type tags for non LLM nims

Signed-off-by: Vishesh Tanksale &lt;vtanksale@nvidia.com&gt;

* Replace the deprecated backend with model_type tags for non LLM nims

Signed-off-by: Vishesh Tanksale &lt;vtanksale@nvidia.com&gt;

* Addressing review comments

Signed-off-by: Vishesh Tanksale &lt;vtanksale@nvidia.com&gt;

---------

Signed-off-by: Vishesh Tanksale &lt;vtanksale@nvidia.com&gt;
diff --git a/api/apps/v1alpha1/nimcache_types.go b/api/apps/v1alpha1/nimcache_types.go
@@ -126,7 +126,7 @@ type ModelSpec struct {
 	Profiles []string `json:"profiles,omitempty"`
 	// Precision is the precision for model quantization
 	Precision string `json:"precision,omitempty"`
-	// Engine is the backend engine (tensort_llm, vllm)
+	// Engine is the backend engine (tensorrt_llm, vllm)
 	Engine string `json:"engine,omitempty"`
 	// TensorParallelism is the minimum GPUs required for the model computations
 	TensorParallelism string `json:"tensorParallelism,omitempty"`
diff --git a/bundle/manifests/apps.nvidia.com_nimcaches.yaml b/bundle/manifests/apps.nvidia.com_nimcaches.yaml
@@ -344,7 +344,7 @@ spec:
                               GPUs
                             type: boolean
                           engine:
-                            description: Engine is the backend engine (tensort_llm,
+                            description: Engine is the backend engine (tensorrt_llm,
                               vllm)
                             type: string
                           gpus:
diff --git a/config/crd/bases/apps.nvidia.com_nimcaches.yaml b/config/crd/bases/apps.nvidia.com_nimcaches.yaml
@@ -344,7 +344,7 @@ spec:
                               GPUs
                             type: boolean
                           engine:
-                            description: Engine is the backend engine (tensort_llm,
+                            description: Engine is the backend engine (tensorrt_llm,
                               vllm)
                             type: string
                           gpus:
diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml
@@ -344,7 +344,7 @@ spec:
                               GPUs
                             type: boolean
                           engine:
-                            description: Engine is the backend engine (tensort_llm,
+                            description: Engine is the backend engine (tensorrt_llm,
                               vllm)
                             type: string
                           gpus:
diff --git a/internal/nimparser/v2/nimparser.go b/internal/nimparser/v2/nimparser.go
@@ -80,10 +80,22 @@ func (manifest NIMManifest) MatchProfiles(modelSpec appsv1alpha1.ModelSpec, disc
 
 		// Determine backend type
 		backend := profile.Tags["llm_engine"]
+
+		// Use "model_type" if "llm_engine" is empty for non LLM models
+		if backend == "" {
+			backend = profile.Tags["model_type"]
+		}
+
+		// Fallback to deprecated "backend" tag for non LLM Models
 		if backend == "" {
 			backend = profile.Tags["backend"]
 		}
 
+		// modespec.Engine value can be "tensorrt_llm". If backend tag is "triton", convert it to "tensorrt".
+		if backend == "triton" {
+			backend = "tensorrt"
+		}
+
 		if modelSpec.Engine != "" && !strings.Contains(backend, strings.TrimSuffix(modelSpec.Engine, "_llm")) {
 			continue
 		}