Skip to content

Commit 502d746

Browse files
committed
Use HuggingFace model names and full NVCR container paths
Per review feedback, update model paths to HuggingFace format (nvidia/Kimi-K2.5-NVFP4) and container to full NVCR registry path (nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.1.0-dev.2) so recipes are portable and work without pre-built sqsh files.
1 parent 4217429 commit 502d746

29 files changed

Lines changed: 73 additions & 73 deletions

recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/MTP/ctx1dep4_gen1dep16_batch32_eplb0_mtp3.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ name: "kimi_k25_nvfp4_ISL1K_OSL1K_ctx1dep4_gen1dep16_batch32_eplb0_mtp3"
66
# concurrency: 666
77

88
model:
9-
path: "kimi-k2.5-nvfp4"
10-
container: "tensorrtllm-runtime:1.1.0-dev.2"
9+
path: "nvidia/Kimi-K2.5-NVFP4"
10+
container: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.1.0-dev.2"
1111
precision: "fp4"
1212

1313
resources:
@@ -115,7 +115,7 @@ backend:
115115
speculative_model_dir: "/eagle-model"
116116

117117
extra_mount:
118-
- "kimi-k2.5-eagle3:/eagle-model"
118+
- "nvidia/Kimi-K2.5-Thinking-Eagle3:/eagle-model"
119119

120120
benchmark:
121121
type: "sa-bench"

recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/MTP/ctx1dep4_gen1dep32_batch16_eplb0_mtp3.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ name: "kimi_k25_nvfp4_ISL1K_OSL1K_ctx1dep4_gen1dep32_batch16_eplb0_mtp3"
66
# concurrency: 666
77

88
model:
9-
path: "kimi-k2.5-nvfp4"
10-
container: "tensorrtllm-runtime:1.1.0-dev.2"
9+
path: "nvidia/Kimi-K2.5-NVFP4"
10+
container: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.1.0-dev.2"
1111
precision: "fp4"
1212

1313
resources:
@@ -113,7 +113,7 @@ backend:
113113
speculative_model_dir: "/eagle-model"
114114

115115
extra_mount:
116-
- "kimi-k2.5-eagle3:/eagle-model"
116+
- "nvidia/Kimi-K2.5-Thinking-Eagle3:/eagle-model"
117117

118118
benchmark:
119119
type: "sa-bench"

recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/MTP/ctx1dep4_gen1dep8_batch512_eplb0_mtp1.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ name: "kimi_k25_nvfp4_ISL1K_OSL1K_ctx1dep4_gen1dep8_batch512_eplb0_mtp1"
66
# concurrency: 4301
77

88
model:
9-
path: "kimi-k2.5-nvfp4"
10-
container: "tensorrtllm-runtime:1.1.0-dev.2"
9+
path: "nvidia/Kimi-K2.5-NVFP4"
10+
container: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.1.0-dev.2"
1111
precision: "fp4"
1212

1313
resources:
@@ -175,7 +175,7 @@ backend:
175175
speculative_model_dir: "/eagle-model"
176176

177177
extra_mount:
178-
- "kimi-k2.5-eagle3:/eagle-model"
178+
- "nvidia/Kimi-K2.5-Thinking-Eagle3:/eagle-model"
179179

180180
benchmark:
181181
type: "sa-bench"

recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/MTP/ctx1dep4_gen4tep8_batch64_allconc_eplb0_mtp3.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ name: "kimi_k25_nvfp4_ISL1K_OSL1K_ctx1dep4_gen4tep8_batch64_allconc_eplb0_mtp3"
66
# Covers all gen4tep8 concurrencies: 8, 48, 92, 192, 336
77

88
model:
9-
path: "kimi-k2.5-nvfp4"
10-
container: "tensorrtllm-runtime:1.1.0-dev.2"
9+
path: "nvidia/Kimi-K2.5-NVFP4"
10+
container: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.1.0-dev.2"
1111
precision: "fp4"
1212

1313
resources:
@@ -120,7 +120,7 @@ backend:
120120
speculative_model_dir: "/eagle-model"
121121

122122
extra_mount:
123-
- "kimi-k2.5-eagle3:/eagle-model"
123+
- "nvidia/Kimi-K2.5-Thinking-Eagle3:/eagle-model"
124124

125125
benchmark:
126126
type: "sa-bench"

recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/MTP/ctx1dep4_gen5tep4_batch2_allconc_eplb0_mtp3.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ name: "kimi_k25_nvfp4_ISL1K_OSL1K_ctx1dep4_gen5tep4_batch2_allconc_eplb0_mtp3"
66
# Covers all gen5tep4 concurrencies: 10, 15
77

88
model:
9-
path: "kimi-k2.5-nvfp4"
10-
container: "tensorrtllm-runtime:1.1.0-dev.2"
9+
path: "nvidia/Kimi-K2.5-NVFP4"
10+
container: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.1.0-dev.2"
1111
precision: "fp4"
1212

1313
resources:
@@ -111,7 +111,7 @@ backend:
111111
speculative_model_dir: "/eagle-model"
112112

113113
extra_mount:
114-
- "kimi-k2.5-eagle3:/eagle-model"
114+
- "nvidia/Kimi-K2.5-Thinking-Eagle3:/eagle-model"
115115

116116
benchmark:
117117
type: "sa-bench"

recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/MTP/ctx2dep4_gen1dep16_batch128_eplb0_mtp1.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ name: "kimi_k25_nvfp4_ISL1K_OSL1K_ctx2dep4_gen1dep16_batch128_eplb0_mtp1"
66
# concurrency: 2253
77

88
model:
9-
path: "kimi-k2.5-nvfp4"
10-
container: "tensorrtllm-runtime:1.1.0-dev.2"
9+
path: "nvidia/Kimi-K2.5-NVFP4"
10+
container: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.1.0-dev.2"
1111
precision: "fp4"
1212

1313
resources:
@@ -127,7 +127,7 @@ backend:
127127
speculative_model_dir: "/eagle-model"
128128

129129
extra_mount:
130-
- "kimi-k2.5-eagle3:/eagle-model"
130+
- "nvidia/Kimi-K2.5-Thinking-Eagle3:/eagle-model"
131131

132132
benchmark:
133133
type: "sa-bench"

recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/MTP/ctx2dep4_gen1dep32_batch64_eplb0_mtp1.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ name: "kimi_k25_nvfp4_ISL1K_OSL1K_ctx2dep4_gen1dep32_batch64_eplb0_mtp1"
66
# concurrency: 2253
77

88
model:
9-
path: "kimi-k2.5-nvfp4"
10-
container: "tensorrtllm-runtime:1.1.0-dev.2"
9+
path: "nvidia/Kimi-K2.5-NVFP4"
10+
container: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.1.0-dev.2"
1111
precision: "fp4"
1212

1313
resources:
@@ -119,7 +119,7 @@ backend:
119119
speculative_model_dir: "/eagle-model"
120120

121121
extra_mount:
122-
- "kimi-k2.5-eagle3:/eagle-model"
122+
- "nvidia/Kimi-K2.5-Thinking-Eagle3:/eagle-model"
123123

124124
benchmark:
125125
type: "sa-bench"

recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/MTP/ctx2dep4_gen3dep8_batch256_eplb0_mtp1.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ name: "kimi_k25_nvfp4_ISL1K_OSL1K_ctx2dep4_gen3dep8_batch256_eplb0_mtp1"
66
# concurrency: 6759
77

88
model:
9-
path: "kimi-k2.5-nvfp4"
10-
container: "tensorrtllm-runtime:1.1.0-dev.2"
9+
path: "nvidia/Kimi-K2.5-NVFP4"
10+
container: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.1.0-dev.2"
1111
precision: "fp4"
1212

1313
resources:
@@ -143,7 +143,7 @@ backend:
143143
speculative_model_dir: "/eagle-model"
144144

145145
extra_mount:
146-
- "kimi-k2.5-eagle3:/eagle-model"
146+
- "nvidia/Kimi-K2.5-Thinking-Eagle3:/eagle-model"
147147

148148
benchmark:
149149
type: "sa-bench"

recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen1dep16_batch32_eplb0_mtp0.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ name: "kimi_k25_nvfp4_ISL1K_OSL1K_ctx1dep4_gen1dep16_batch32_eplb0_mtp0"
66
# concurrency: 666
77

88
model:
9-
path: "kimi-k2.5-nvfp4"
10-
container: "tensorrtllm-runtime:1.1.0-dev.2"
9+
path: "nvidia/Kimi-K2.5-NVFP4"
10+
container: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.1.0-dev.2"
1111
precision: "fp4"
1212

1313
resources:

recipes/kimi2.5/trtllm_dynamo/disagg/gb200Nvfp4/ISL1K_OSL1K/STP/ctx1dep4_gen1dep32_batch64_eplb0_mtp0.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ name: "kimi_k25_nvfp4_ISL1K_OSL1K_ctx1dep4_gen1dep32_batch64_eplb0_mtp0"
66
# concurrency: 2253
77

88
model:
9-
path: "kimi-k2.5-nvfp4"
10-
container: "tensorrtllm-runtime:1.1.0-dev.2"
9+
path: "nvidia/Kimi-K2.5-NVFP4"
10+
container: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.1.0-dev.2"
1111
precision: "fp4"
1212

1313
resources:

0 commit comments

Comments
 (0)