Skip to content

Commit 4a4ac8f

Browse files
Replace the deprecated backend with model_type tags for non LLM nims (#513)
* Replace the deprecated backend with model_type tags for non LLM nims Signed-off-by: Vishesh Tanksale <vtanksale@nvidia.com> * Replace the deprecated backend with model_type tags for non LLM nims Signed-off-by: Vishesh Tanksale <vtanksale@nvidia.com> * Addressing review comments Signed-off-by: Vishesh Tanksale <vtanksale@nvidia.com> --------- Signed-off-by: Vishesh Tanksale <vtanksale@nvidia.com>
1 parent 3cc0cde commit 4a4ac8f

File tree

5 files changed

+16
-4
lines changed

5 files changed

+16
-4
lines changed

api/apps/v1alpha1/nimcache_types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ type ModelSpec struct {
126126
Profiles []string `json:"profiles,omitempty"`
127127
// Precision is the precision for model quantization
128128
Precision string `json:"precision,omitempty"`
129-
// Engine is the backend engine (tensort_llm, vllm)
129+
// Engine is the backend engine (tensorrt_llm, vllm)
130130
Engine string `json:"engine,omitempty"`
131131
// TensorParallelism is the minimum GPUs required for the model computations
132132
TensorParallelism string `json:"tensorParallelism,omitempty"`

bundle/manifests/apps.nvidia.com_nimcaches.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ spec:
344344
GPUs
345345
type: boolean
346346
engine:
347-
description: Engine is the backend engine (tensort_llm,
347+
description: Engine is the backend engine (tensorrt_llm,
348348
vllm)
349349
type: string
350350
gpus:

config/crd/bases/apps.nvidia.com_nimcaches.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ spec:
344344
GPUs
345345
type: boolean
346346
engine:
347-
description: Engine is the backend engine (tensort_llm,
347+
description: Engine is the backend engine (tensorrt_llm,
348348
vllm)
349349
type: string
350350
gpus:

deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ spec:
344344
GPUs
345345
type: boolean
346346
engine:
347-
description: Engine is the backend engine (tensort_llm,
347+
description: Engine is the backend engine (tensorrt_llm,
348348
vllm)
349349
type: string
350350
gpus:

internal/nimparser/v2/nimparser.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,22 @@ func (manifest NIMManifest) MatchProfiles(modelSpec appsv1alpha1.ModelSpec, disc
8080

8181
// Determine backend type
8282
backend := profile.Tags["llm_engine"]
83+
84+
// Use "model_type" if "llm_engine" is empty for non LLM models
85+
if backend == "" {
86+
backend = profile.Tags["model_type"]
87+
}
88+
89+
// Fallback to deprecated "backend" tag for non LLM Models
8390
if backend == "" {
8491
backend = profile.Tags["backend"]
8592
}
8693

94+
// modespec.Engine value can be "tensorrt_llm". If backend tag is "triton", convert it to "tensorrt".
95+
if backend == "triton" {
96+
backend = "tensorrt"
97+
}
98+
8799
if modelSpec.Engine != "" && !strings.Contains(backend, strings.TrimSuffix(modelSpec.Engine, "_llm")) {
88100
continue
89101
}

0 commit comments

Comments
 (0)