Skip to content

Commit 86e0282

Browse files
nvchenghaozclaude
andauthored
[None][chore] Autodeploy: add models for sprint (NVIDIA#11999)
Signed-off-by: Chenghao Zhang <211069071+nvchenghaoz@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent dd8ffbd commit 86e0282

File tree

1 file changed

+284
-0
lines changed

1 file changed

+284
-0
lines changed

examples/auto_deploy/model_registry/models.yaml

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,287 @@ models:
229229
yaml_extra: ['glm-4.7-flash.yaml']
230230
- name: Nanbeige/Nanbeige4.1-3B
231231
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
232+
# =============================================================================
233+
# Model list for sprint
234+
# =============================================================================
235+
# --- Qwen3.5 dense (Feb 2026) ---
236+
- name: Qwen/Qwen3.5-0.8B
237+
yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
238+
- name: Qwen/Qwen3.5-27B
239+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
240+
# --- Qwen3.5 MoE (Feb 2026) ---
241+
- name: Qwen/Qwen3.5-35B-A3B
242+
yaml_extra: ['qwen3.5_moe_35b.yaml']
243+
- name: Qwen/Qwen3.5-397B-A17B
244+
yaml_extra: ['qwen3.5_moe_400b.yaml']
245+
# --- GLM-5 (Feb 2026) ---
246+
- name: zai-org/GLM-5
247+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
248+
- name: zai-org/GLM-5-FP8
249+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
250+
# --- MiniMax-M2.5 (Feb 2026) ---
251+
- name: MiniMaxAI/MiniMax-M2.5
252+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
253+
# --- MiMo-V2-Flash (Feb 2026) ---
254+
- name: XiaomiMiMo/MiMo-V2-Flash
255+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
256+
# --- Kimi-K2.5 (Jan 2026) ---
257+
- name: moonshotai/Kimi-K2.5
258+
yaml_extra: ['kimi_k2.yaml']
259+
# --- GLM-4.7 (Dec 2025) ---
260+
- name: zai-org/GLM-4.7
261+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
262+
# --- DeepSeek V3.2 (Dec 2025) ---
263+
- name: deepseek-ai/DeepSeek-V3.2
264+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
265+
- name: deepseek-ai/DeepSeek-V3.2-Speciale
266+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
267+
- name: nvidia/DeepSeek-V3.2-NVFP4
268+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
269+
# --- GLM-4.6 (Sep 2025) ---
270+
- name: zai-org/GLM-4.6
271+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
272+
# --- Qwen3-Next (Sep 2025) ---
273+
- name: Qwen/Qwen3-Next-80B-A3B-Instruct
274+
yaml_extra: ['qwen3Next.yaml']
275+
# --- OLMo 3 (Nov 2025) ---
276+
- name: allenai/Olmo-3-7B-Instruct
277+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
278+
- name: allenai/Olmo-3.1-32B-Instruct
279+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
280+
# --- Command A (2025) ---
281+
- name: CohereLabs/c4ai-command-a-03-2025
282+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
283+
- name: CohereLabs/command-a-vision-07-2025
284+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
285+
# --- Aya Expanse (2025) - multilingual ---
286+
- name: CohereForAI/aya-expanse-8b
287+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
288+
- name: CohereForAI/aya-expanse-32b
289+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
290+
# --- Tencent Hunyuan (2025) ---
291+
- name: tencent/Hunyuan-A13B-Instruct
292+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
293+
- name: tencent/Hunyuan-7B-Instruct
294+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
295+
# --- Nemotron-H (2025) - hybrid Mamba-Transformer ---
296+
- name: nvidia/Nemotron-H-8B-Base-8K
297+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
298+
- name: nvidia/Nemotron-H-47B-Reasoning-128K
299+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
300+
# --- Granite 4.0 (2025) - hybrid Mamba/Transformer ---
301+
- name: ibm-granite/granite-4.0-micro
302+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
303+
- name: ibm-granite/granite-4.0-h-small
304+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
305+
# --- AI21 Jamba (2025) - hybrid SSM-Transformer ---
306+
- name: ai21labs/AI21-Jamba-Large-1.7
307+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
308+
- name: ai21labs/AI21-Jamba-Reasoning-3B
309+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
310+
# --- Skywork (2025) ---
311+
- name: Skywork/Skywork-R1V2-38B
312+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
313+
- name: Skywork/Skywork-SWE-32B
314+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
315+
# --- Seed (2025) ---
316+
- name: ByteDance-Seed/Seed-Coder-8B-Instruct
317+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
318+
- name: ByteDance-Seed/Seed-OSS-36B-Instruct
319+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
320+
# --- Qwen3 Instruct 2507 update ---
321+
- name: Qwen/Qwen3-4B-Instruct-2507
322+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
323+
# --- SmolLM3 (Jul 2025) ---
324+
- name: HuggingFaceTB/SmolLM3-3B
325+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
326+
- name: HuggingFaceTB/SmolLM3-3B-Base
327+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
328+
# --- Gemma 3n (Jun 2025) - on-device VLM ---
329+
- name: google/gemma-3n-E2B-it
330+
yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml', 'multimodal.yaml']
331+
- name: google/gemma-3n-E4B-it
332+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
333+
# --- JetBrains Mellum (Apr 2025) - code specialist ---
334+
- name: JetBrains/Mellum-4b-sft-all
335+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
336+
# --- Qwen3 missing sizes (May 2025) ---
337+
- name: Qwen/Qwen3-1.7B
338+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
339+
- name: Qwen/Qwen3-32B
340+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
341+
# --- DeepSeek R1-0528 (May 2025) ---
342+
- name: deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
343+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
344+
# --- Llama 4 base models (Apr 2025) ---
345+
- name: meta-llama/Llama-4-Scout-17B-16E
346+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'llama4_scout.yaml']
347+
- name: meta-llama/Llama-4-Maverick-17B-128E
348+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'llama4_maverick_lite.yaml']
349+
# --- Phi-4 variants (2025) ---
350+
- name: microsoft/Phi-4-multimodal-instruct
351+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
352+
- name: microsoft/Phi-4-reasoning-vision-15B
353+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'multimodal.yaml']
354+
# --- MiniMax M2 (2025) ---
355+
- name: MiniMaxAI/MiniMax-M2
356+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
357+
# --- Tencent Hunyuan small (2025) ---
358+
- name: tencent/Hunyuan-1.8B-Instruct
359+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
360+
- name: tencent/Hunyuan-MT-7B
361+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
362+
# --- UI-TARS (2025) - GUI agent VLM ---
363+
- name: ByteDance-Seed/UI-TARS-1.5-7B
364+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
365+
# --- Nvidia Nemotron Flash (2025) ---
366+
- name: nvidia/Nemotron-Flash-3B-Instruct
367+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
368+
# --- InternLM3 (Jan 2025) ---
369+
- name: internlm/internlm3-8b-instruct
370+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
371+
# --- Gemma 3 missing sizes (Mar 2025) ---
372+
- name: google/gemma-3-4b-it
373+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
374+
- name: google/gemma-3-12b-it
375+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'multimodal.yaml']
376+
# --- Mistral Small (2025) ---
377+
- name: mistralai/Mistral-Small-24B-Instruct-2501
378+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
379+
- name: mistralai/Mistral-Small-3.1-24B-Instruct-2503
380+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'multimodal.yaml']
381+
# --- DeepSeek R1 distills (Jan 2025) ---
382+
- name: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
383+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
384+
- name: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
385+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
386+
- name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
387+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
388+
# --- DeepSeek Prover V2 671B (2025) ---
389+
- name: deepseek-ai/DeepSeek-Prover-V2-671B
390+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
391+
# --- OLMo 2 (Mar 2025) ---
392+
- name: allenai/OLMo-2-0325-32B-Instruct
393+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
394+
- name: allenai/OLMo-2-0325-32B-DPO
395+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
396+
# --- Command A variants (2025) ---
397+
- name: CohereLabs/command-a-translate-08-2025
398+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
399+
- name: CohereLabs/command-a-reasoning-08-2025
400+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
401+
# --- Falcon3 (Dec 2024) ---
402+
- name: tiiuae/Falcon3-1B-Instruct
403+
yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
404+
- name: tiiuae/Falcon3-10B-Instruct
405+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
406+
# --- EXAONE 3.5 (Dec 2024) ---
407+
- name: LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct
408+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
409+
- name: LGAI-EXAONE/EXAONE-3.5-32B-Instruct
410+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
411+
# --- SmolLM2 (Nov 2024) ---
412+
- name: HuggingFaceTB/SmolLM2-135M-Instruct
413+
yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
414+
- name: HuggingFaceTB/SmolLM2-1.7B-Instruct
415+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
416+
# --- Qwen2.5-Coder (Nov 2024) ---
417+
- name: Qwen/Qwen2.5-Coder-1.5B-Instruct
418+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
419+
- name: Qwen/Qwen2.5-Coder-32B-Instruct
420+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
421+
# --- OLMo 3 Think (Nov 2025) ---
422+
- name: allenai/Olmo-3-32B-Think
423+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
424+
# --- Qwen3-14B (May 2025) ---
425+
- name: Qwen/Qwen3-14B
426+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
427+
# --- DeepSeek V3 (Jan 2025) ---
428+
- name: deepseek-ai/DeepSeek-V3
429+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
430+
# --- Qwen2.5 larger sizes ---
431+
- name: Qwen/Qwen2.5-14B-Instruct
432+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
433+
- name: Qwen/Qwen2.5-72B-Instruct
434+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
435+
# --- Qwen2.5-Math ---
436+
- name: Qwen/Qwen2.5-Math-7B-Instruct
437+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
438+
# --- Nvidia Nemotron 3 Nano (2025) ---
439+
- name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16
440+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'nano_v3.yaml']
441+
- name: nvidia/NVIDIA-Nemotron-Nano-12B-v2
442+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
443+
# --- Perplexity R1-1776 distill Qwen (2025) ---
444+
- name: perplexity-ai/r1-1776-distill-qwen-32b
445+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
446+
# --- Nanbeige 8B (2025) ---
447+
- name: Nanbeige/Nanbeige4.1-8B
448+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
449+
# --- Qwen3 MoE updates (2025) ---
450+
- name: Qwen/Qwen3-30B-A3B-Instruct-2507
451+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
452+
- name: Qwen/Qwen3-0.6B-FP8
453+
yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
454+
# --- Mistral updates (2025) ---
455+
- name: mistralai/Codestral-25.01
456+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml']
457+
- name: mistralai/Mistral-Large-Instruct-2501
458+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml']
459+
# --- Qwen3-VL 2B (2025) ---
460+
- name: Qwen/Qwen3-VL-2B-Instruct
461+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml', 'qwen3_vl.yaml']
462+
# --- Granite 4.0 tiny (2025) ---
463+
- name: ibm-granite/granite-4.0-tiny-preview
464+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
465+
# --- OLMo 3 Think 7B (2025) ---
466+
- name: allenai/Olmo-3-7B-Think
467+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
468+
# --- Nemotron-H reasoning 8B (2025) ---
469+
- name: nvidia/Nemotron-H-8B-Reasoning-128K
470+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
471+
# --- Pixtral (2025) - VLM ---
472+
- name: mistralai/Pixtral-12B-2409
473+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'multimodal.yaml']
474+
# --- DeepSeek Coder V2 Lite (2025) ---
475+
- name: deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct
476+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
477+
# --- Seed-Coder reasoning (2025) ---
478+
- name: ByteDance-Seed/Seed-Coder-8B-Reasoning
479+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
480+
# --- Tencent Hunyuan translation (2025) ---
481+
- name: tencent/Hunyuan-MT-Chimera-7B
482+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml']
483+
# --- Phi-4-mini flash reasoning (2025) ---
484+
- name: microsoft/Phi-4-mini-flash-reasoning
485+
yaml_extra: ['dashboard_default.yaml', 'world_size_1.yaml']
486+
# --- Qwen2.5-VL (2025) - top VLM family ---
487+
- name: Qwen/Qwen2.5-VL-7B-Instruct
488+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
489+
- name: Qwen/Qwen2.5-VL-72B-Instruct
490+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
491+
# --- Qwen3-VL MoE (2025) - flagship VLM ---
492+
- name: Qwen/Qwen3-VL-30B-A3B-Instruct
493+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'qwen3_vl.yaml']
494+
# --- InternVL3 (Apr 2025) - #1 open-source VLM ---
495+
- name: OpenGVLab/InternVL3-8B
496+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
497+
- name: OpenGVLab/InternVL3-78B
498+
yaml_extra: ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
499+
# --- InternVL3.5 (2025) - latest gen ---
500+
- name: OpenGVLab/InternVL3_5-8B
501+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
502+
# --- SmolVLM2 (2025) - tiny VLM ---
503+
- name: HuggingFaceTB/SmolVLM2-2.2B-Instruct
504+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
505+
# --- Molmo2 (2025) - fully open VLM ---
506+
- name: allenai/Molmo2-8B
507+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
508+
# --- DeepSeek-VL2 (2025) - MoE VLM ---
509+
- name: deepseek-ai/deepseek-vl2-small
510+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
511+
# --- Aya Vision (2025) - multilingual VLM ---
512+
- name: CohereLabs/aya-vision-8b
513+
yaml_extra: ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
514+
- name: CohereLabs/aya-vision-32b
515+
yaml_extra: ['dashboard_default.yaml', 'world_size_4.yaml', 'multimodal.yaml']

0 commit comments

Comments
 (0)