@@ -229,3 +229,287 @@ models:
229229 yaml_extra : ['glm-4.7-flash.yaml']
230230- name : Nanbeige/Nanbeige4.1-3B
231231 yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
232+ # =============================================================================
233+ # Model list for sprint
234+ # =============================================================================
235+ # --- Qwen3.5 dense (Feb 2026) ---
236+ - name : Qwen/Qwen3.5-0.8B
237+ yaml_extra : ['dashboard_default.yaml', 'world_size_1.yaml']
238+ - name : Qwen/Qwen3.5-27B
239+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
240+ # --- Qwen3.5 MoE (Feb 2026) ---
241+ - name : Qwen/Qwen3.5-35B-A3B
242+ yaml_extra : ['qwen3.5_moe_35b.yaml']
243+ - name : Qwen/Qwen3.5-397B-A17B
244+ yaml_extra : ['qwen3.5_moe_400b.yaml']
245+ # --- GLM-5 (Feb 2026) ---
246+ - name : zai-org/GLM-5
247+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml']
248+ - name : zai-org/GLM-5-FP8
249+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml']
250+ # --- MiniMax-M2.5 (Feb 2026) ---
251+ - name : MiniMaxAI/MiniMax-M2.5
252+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml']
253+ # --- MiMo-V2-Flash (Feb 2026) ---
254+ - name : XiaomiMiMo/MiMo-V2-Flash
255+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml']
256+ # --- Kimi-K2.5 (Jan 2026) ---
257+ - name : moonshotai/Kimi-K2.5
258+ yaml_extra : ['kimi_k2.yaml']
259+ # --- GLM-4.7 (Dec 2025) ---
260+ - name : zai-org/GLM-4.7
261+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml']
262+ # --- DeepSeek V3.2 (Dec 2025) ---
263+ - name : deepseek-ai/DeepSeek-V3.2
264+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
265+ - name : deepseek-ai/DeepSeek-V3.2-Speciale
266+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
267+ - name : nvidia/DeepSeek-V3.2-NVFP4
268+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
269+ # --- GLM-4.6 (Sep 2025) ---
270+ - name : zai-org/GLM-4.6
271+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml']
272+ # --- Qwen3-Next (Sep 2025) ---
273+ - name : Qwen/Qwen3-Next-80B-A3B-Instruct
274+ yaml_extra : ['qwen3Next.yaml']
275+ # --- OLMo 3 (Nov 2025) ---
276+ - name : allenai/Olmo-3-7B-Instruct
277+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
278+ - name : allenai/Olmo-3.1-32B-Instruct
279+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
280+ # --- Command A (2025) ---
281+ - name : CohereLabs/c4ai-command-a-03-2025
282+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml']
283+ - name : CohereLabs/command-a-vision-07-2025
284+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
285+ # --- Aya Expanse (2025) - multilingual ---
286+ - name : CohereForAI/aya-expanse-8b
287+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
288+ - name : CohereForAI/aya-expanse-32b
289+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
290+ # --- Tencent Hunyuan (2025) ---
291+ - name : tencent/Hunyuan-A13B-Instruct
292+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml']
293+ - name : tencent/Hunyuan-7B-Instruct
294+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
295+ # --- Nemotron-H (2025) - hybrid Mamba-Transformer ---
296+ - name : nvidia/Nemotron-H-8B-Base-8K
297+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
298+ - name : nvidia/Nemotron-H-47B-Reasoning-128K
299+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml']
300+ # --- Granite 4.0 (2025) - hybrid Mamba/Transformer ---
301+ - name : ibm-granite/granite-4.0-micro
302+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
303+ - name : ibm-granite/granite-4.0-h-small
304+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
305+ # --- AI21 Jamba (2025) - hybrid SSM-Transformer ---
306+ - name : ai21labs/AI21-Jamba-Large-1.7
307+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml']
308+ - name : ai21labs/AI21-Jamba-Reasoning-3B
309+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
310+ # --- Skywork (2025) ---
311+ - name : Skywork/Skywork-R1V2-38B
312+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
313+ - name : Skywork/Skywork-SWE-32B
314+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
315+ # --- Seed (2025) ---
316+ - name : ByteDance-Seed/Seed-Coder-8B-Instruct
317+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
318+ - name : ByteDance-Seed/Seed-OSS-36B-Instruct
319+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
320+ # --- Qwen3 Instruct 2507 update ---
321+ - name : Qwen/Qwen3-4B-Instruct-2507
322+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
323+ # --- SmolLM3 (Jul 2025) ---
324+ - name : HuggingFaceTB/SmolLM3-3B
325+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
326+ - name : HuggingFaceTB/SmolLM3-3B-Base
327+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
328+ # --- Gemma 3n (Jun 2025) - on-device VLM ---
329+ - name : google/gemma-3n-E2B-it
330+ yaml_extra : ['dashboard_default.yaml', 'world_size_1.yaml', 'multimodal.yaml']
331+ - name : google/gemma-3n-E4B-it
332+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
333+ # --- JetBrains Mellum (Apr 2025) - code specialist ---
334+ - name : JetBrains/Mellum-4b-sft-all
335+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
336+ # --- Qwen3 missing sizes (May 2025) ---
337+ - name : Qwen/Qwen3-1.7B
338+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
339+ - name : Qwen/Qwen3-32B
340+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
341+ # --- DeepSeek R1-0528 (May 2025) ---
342+ - name : deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
343+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
344+ # --- Llama 4 base models (Apr 2025) ---
345+ - name : meta-llama/Llama-4-Scout-17B-16E
346+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'llama4_scout.yaml']
347+ - name : meta-llama/Llama-4-Maverick-17B-128E
348+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'llama4_maverick_lite.yaml']
349+ # --- Phi-4 variants (2025) ---
350+ - name : microsoft/Phi-4-multimodal-instruct
351+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
352+ - name : microsoft/Phi-4-reasoning-vision-15B
353+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml', 'multimodal.yaml']
354+ # --- MiniMax M2 (2025) ---
355+ - name : MiniMaxAI/MiniMax-M2
356+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml']
357+ # --- Tencent Hunyuan small (2025) ---
358+ - name : tencent/Hunyuan-1.8B-Instruct
359+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
360+ - name : tencent/Hunyuan-MT-7B
361+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
362+ # --- UI-TARS (2025) - GUI agent VLM ---
363+ - name : ByteDance-Seed/UI-TARS-1.5-7B
364+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
365+ # --- Nvidia Nemotron Flash (2025) ---
366+ - name : nvidia/Nemotron-Flash-3B-Instruct
367+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
368+ # --- InternLM3 (Jan 2025) ---
369+ - name : internlm/internlm3-8b-instruct
370+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
371+ # --- Gemma 3 missing sizes (Mar 2025) ---
372+ - name : google/gemma-3-4b-it
373+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
374+ - name : google/gemma-3-12b-it
375+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml', 'multimodal.yaml']
376+ # --- Mistral Small (2025) ---
377+ - name : mistralai/Mistral-Small-24B-Instruct-2501
378+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
379+ - name : mistralai/Mistral-Small-3.1-24B-Instruct-2503
380+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml', 'multimodal.yaml']
381+ # --- DeepSeek R1 distills (Jan 2025) ---
382+ - name : deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
383+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
384+ - name : deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
385+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
386+ - name : deepseek-ai/DeepSeek-R1-Distill-Llama-8B
387+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
388+ # --- DeepSeek Prover V2 671B (2025) ---
389+ - name : deepseek-ai/DeepSeek-Prover-V2-671B
390+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
391+ # --- OLMo 2 (Mar 2025) ---
392+ - name : allenai/OLMo-2-0325-32B-Instruct
393+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
394+ - name : allenai/OLMo-2-0325-32B-DPO
395+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
396+ # --- Command A variants (2025) ---
397+ - name : CohereLabs/command-a-translate-08-2025
398+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml']
399+ - name : CohereLabs/command-a-reasoning-08-2025
400+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml']
401+ # --- Falcon3 (Dec 2024) ---
402+ - name : tiiuae/Falcon3-1B-Instruct
403+ yaml_extra : ['dashboard_default.yaml', 'world_size_1.yaml']
404+ - name : tiiuae/Falcon3-10B-Instruct
405+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
406+ # --- EXAONE 3.5 (Dec 2024) ---
407+ - name : LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct
408+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
409+ - name : LGAI-EXAONE/EXAONE-3.5-32B-Instruct
410+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
411+ # --- SmolLM2 (Nov 2024) ---
412+ - name : HuggingFaceTB/SmolLM2-135M-Instruct
413+ yaml_extra : ['dashboard_default.yaml', 'world_size_1.yaml']
414+ - name : HuggingFaceTB/SmolLM2-1.7B-Instruct
415+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
416+ # --- Qwen2.5-Coder (Nov 2024) ---
417+ - name : Qwen/Qwen2.5-Coder-1.5B-Instruct
418+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
419+ - name : Qwen/Qwen2.5-Coder-32B-Instruct
420+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
421+ # --- OLMo 3 Think (Nov 2025) ---
422+ - name : allenai/Olmo-3-32B-Think
423+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
424+ # --- Qwen3-14B (May 2025) ---
425+ - name : Qwen/Qwen3-14B
426+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
427+ # --- DeepSeek V3 (Jan 2025) ---
428+ - name : deepseek-ai/DeepSeek-V3
429+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml', 'num_hidden_layers_5.yaml']
430+ # --- Qwen2.5 larger sizes ---
431+ - name : Qwen/Qwen2.5-14B-Instruct
432+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
433+ - name : Qwen/Qwen2.5-72B-Instruct
434+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml']
435+ # --- Qwen2.5-Math ---
436+ - name : Qwen/Qwen2.5-Math-7B-Instruct
437+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
438+ # --- Nvidia Nemotron 3 Nano (2025) ---
439+ - name : nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16
440+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml', 'nano_v3.yaml']
441+ - name : nvidia/NVIDIA-Nemotron-Nano-12B-v2
442+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
443+ # --- Perplexity R1-1776 distill Qwen (2025) ---
444+ - name : perplexity-ai/r1-1776-distill-qwen-32b
445+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
446+ # --- Nanbeige 8B (2025) ---
447+ - name : Nanbeige/Nanbeige4.1-8B
448+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
449+ # --- Qwen3 MoE updates (2025) ---
450+ - name : Qwen/Qwen3-30B-A3B-Instruct-2507
451+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml', 'simple_shard_only.yaml']
452+ - name : Qwen/Qwen3-0.6B-FP8
453+ yaml_extra : ['dashboard_default.yaml', 'world_size_1.yaml']
454+ # --- Mistral updates (2025) ---
455+ - name : mistralai/Codestral-25.01
456+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml']
457+ - name : mistralai/Mistral-Large-Instruct-2501
458+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml']
459+ # --- Qwen3-VL 2B (2025) ---
460+ - name : Qwen/Qwen3-VL-2B-Instruct
461+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml', 'qwen3_vl.yaml']
462+ # --- Granite 4.0 tiny (2025) ---
463+ - name : ibm-granite/granite-4.0-tiny-preview
464+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
465+ # --- OLMo 3 Think 7B (2025) ---
466+ - name : allenai/Olmo-3-7B-Think
467+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
468+ # --- Nemotron-H reasoning 8B (2025) ---
469+ - name : nvidia/Nemotron-H-8B-Reasoning-128K
470+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
471+ # --- Pixtral (2025) - VLM ---
472+ - name : mistralai/Pixtral-12B-2409
473+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml', 'multimodal.yaml']
474+ # --- DeepSeek Coder V2 Lite (2025) ---
475+ - name : deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct
476+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
477+ # --- Seed-Coder reasoning (2025) ---
478+ - name : ByteDance-Seed/Seed-Coder-8B-Reasoning
479+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
480+ # --- Tencent Hunyuan translation (2025) ---
481+ - name : tencent/Hunyuan-MT-Chimera-7B
482+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml']
483+ # --- Phi-4-mini flash reasoning (2025) ---
484+ - name : microsoft/Phi-4-mini-flash-reasoning
485+ yaml_extra : ['dashboard_default.yaml', 'world_size_1.yaml']
486+ # --- Qwen2.5-VL (2025) - top VLM family ---
487+ - name : Qwen/Qwen2.5-VL-7B-Instruct
488+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
489+ - name : Qwen/Qwen2.5-VL-72B-Instruct
490+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
491+ # --- Qwen3-VL MoE (2025) - flagship VLM ---
492+ - name : Qwen/Qwen3-VL-30B-A3B-Instruct
493+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml', 'qwen3_vl.yaml']
494+ # --- InternVL3 (Apr 2025) - #1 open-source VLM ---
495+ - name : OpenGVLab/InternVL3-8B
496+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
497+ - name : OpenGVLab/InternVL3-78B
498+ yaml_extra : ['dashboard_default.yaml', 'world_size_8.yaml', 'multimodal.yaml']
499+ # --- InternVL3.5 (2025) - latest gen ---
500+ - name : OpenGVLab/InternVL3_5-8B
501+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
502+ # --- SmolVLM2 (2025) - tiny VLM ---
503+ - name : HuggingFaceTB/SmolVLM2-2.2B-Instruct
504+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
505+ # --- Molmo2 (2025) - fully open VLM ---
506+ - name : allenai/Molmo2-8B
507+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
508+ # --- DeepSeek-VL2 (2025) - MoE VLM ---
509+ - name : deepseek-ai/deepseek-vl2-small
510+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
511+ # --- Aya Vision (2025) - multilingual VLM ---
512+ - name : CohereLabs/aya-vision-8b
513+ yaml_extra : ['dashboard_default.yaml', 'world_size_2.yaml', 'multimodal.yaml']
514+ - name : CohereLabs/aya-vision-32b
515+ yaml_extra : ['dashboard_default.yaml', 'world_size_4.yaml', 'multimodal.yaml']
0 commit comments