|
1 | | -# EAGLE3 speculative-decoding training recipe. Override fields via OmegaConf dotlist on the CLI. |
| 1 | +# EAGLE3 speculative-decoding training recipe. Override fields via OmegaConf dotlist on the CLI |
| 2 | +# or by importing this file from a per-model recipe in modelopt_recipes/models/. |
2 | 3 |
|
3 | 4 | metadata: |
4 | 5 | recipe_type: speculative_eagle |
5 | 6 | description: EAGLE3 training recipe (model/data/training/eagle bundled). |
6 | 7 |
|
7 | | -# maps to ModelArguments (main.py) |
| 8 | +imports: |
| 9 | + eagle_default: configs/speculative_decoding/eagle/default |
| 10 | + eagle_training_default: configs/speculative_decoding/eagle/training_default |
| 11 | + |
| 12 | +# maps to ModelArguments |
8 | 13 | model: |
9 | 14 | model_name_or_path: |
10 | 15 | trust_remote_code: false |
11 | 16 | use_fake_base_for_offline: false |
12 | 17 |
|
13 | | -# maps to DataArguments (main.py) |
| 18 | +# maps to DataArguments |
14 | 19 | data: |
15 | 20 | data_path: input_conversations/train.jsonl |
16 | 21 | offline_data_path: |
17 | 22 | draft_vocab_cache: |
18 | 23 | vlm_img_dir: |
19 | 24 | vlm_processor: |
20 | 25 |
|
21 | | -# maps to TrainingArguments (main.py) |
| 26 | +# maps to TrainingArguments |
22 | 27 | training: |
23 | | - # --- commonly modified --- |
24 | | - output_dir: |
25 | | - num_train_epochs: 1 |
26 | | - per_device_train_batch_size: 1 |
27 | | - learning_rate: 1.0e-4 |
28 | | - warmup_steps: 1000 |
29 | | - training_seq_len: 2048 |
30 | | - logging_steps: 100 |
31 | | - save_steps: 8192 |
32 | | - cp_size: 1 |
33 | | - disable_tqdm: false |
34 | | - estimate_ar: false |
35 | | - ar_validate_steps: -1 |
36 | | - answer_only_loss: false |
37 | | - |
38 | | - # --- rarely modified --- |
39 | | - do_eval: false |
40 | | - lr_scheduler_type: linear |
41 | | - save_strategy: steps |
42 | | - weight_decay: 0.0 |
43 | | - dataloader_drop_last: true |
44 | | - bf16: true |
45 | | - tf32: true |
46 | | - remove_unused_columns: false |
| 28 | + $import: eagle_training_default |
47 | 29 |
|
48 | 30 | # maps to EagleConfig (modelopt/torch/speculative/config.py). |
49 | 31 | eagle: |
50 | | - # eagle_offline is derived from data.offline_data_path; do not set here. |
51 | | - eagle_decoder_type: llama |
52 | | - eagle_ttt_steps: 3 |
53 | | - eagle_mix_hidden_states: false |
54 | | - eagle_use_torch_compile: true |
55 | | - eagle_self_logit_distillation: true |
56 | | - eagle_freeze_base_model: true |
57 | | - eagle_loss_decay_factor: 0.9 |
58 | | - eagle_hidden_state_distillation: false |
59 | | - eagle_reuse_base_decoder: false |
60 | | - eagle_report_acc: true |
61 | | - eagle_enable_nvtx: false |
62 | | - # Rope scaling: disable during training (default_config.py uses rope_type=default), |
63 | | - # inject YaRN during export for long-context inference. |
64 | | - eagle_export_rope_scaling: |
65 | | - rope_type: yarn |
66 | | - factor: 32.0 |
67 | | - original_max_position_embeddings: 2048 |
68 | | - # overwrite to modelopt/torch/speculative/eagle/default_config.py |
69 | | - eagle_architecture_config: {} |
| 32 | + $import: eagle_default |
0 commit comments