File tree Expand file tree Collapse file tree
tools/launcher/examples/Qwen/moonshotai/Kimi-K2.5-DFlash Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ # Step3 offline EAGLE3 draft-head training for moonshotai/Kimi-K2.5-DFlash.
2+ #
3+ # Standalone task extracted from the 4-task hf_offline_eagle3 pipeline.
4+ # Reads hidden states produced by step2 from /scratchspace/offline_hidden_states.
5+ #
6+ # Usage:
7+ # uv run slurm.py --yaml modules/Model-Optimizer/tools/launcher/examples/Qwen/moonshotai/Kimi-K2.5-DFlash/step3_train.yaml --yes
8+
9+ job_name : moonshotai/Kimi-K2.5-DFlash_EAGLE3_train
10+ pipeline :
11+ allow_to_fail : false
12+ skip : false
13+ note :
14+
15+ global_vars :
16+ hf_model : /hf-local/Qwen/moonshotai/Kimi-K2.5-DFlash
17+
18+ task_0 :
19+ script : common/eagle3/train_eagle.sh
20+ args :
21+ - --config modules/Model-Optimizer/modelopt_recipes/general/speculative_decoding/eagle3.yaml
22+ - model.model_name_or_path=<<global_vars.hf_model>>
23+ - data.offline_data_path=/scratchspace/offline_hidden_states
24+ - training.output_dir=/scratchspace/eagle3
25+ - training.training_seq_len=4096
26+ - training.disable_tqdm=true
27+ - training.ar_validate_steps=500000
28+ slurm_config :
29+ _factory_ : " slurm_factory"
30+ nodes : 1
31+ ntasks_per_node : 1
32+ gpus_per_node : 8
33+ container : nvcr.io/nvidia/tensorrt-llm/release:1.2.0
You can’t perform that action at this time.
0 commit comments