Skip to content

[DO NOT MERGE] SFT configs for Qwen coder models #438

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 23 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions recipes/QwQ-32B/sft/config_v00.00.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Config for 16 nodes to go fast
# Model arguments
model_name_or_path: Qwen/QwQ-32B
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2

# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: solutions
dataset_num_proc: 1

# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/QwQ-32B-SFT
hub_model_revision: v00.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/QwQ-SFT-v00.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 1
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: false # fails on multi-node
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03
57 changes: 57 additions & 0 deletions recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v00.00.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Config for 16 nodes to go fast
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2

# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: solutions
dataset_num_proc: 12

# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_always_push: true
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
hub_model_revision: v00.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
optim: paged_adamw_8bit
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v00.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 1
push_to_hub: true
report_to:
- wandb
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: false # fails on multi-node
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03
57 changes: 57 additions & 0 deletions recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v01.00.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Config for 16 nodes to go fast
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2

# Data training arguments
dataset_name: open-r1/codeforces-ioi-cots-mix
dataset_config: solutions_stop_compile
dataset_num_proc: 12

# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_always_push: true
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
hub_model_revision: v01.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
optim: paged_adamw_8bit
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v01.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 1
push_to_hub: true
report_to:
- wandb
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: false # fails on multi-node
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03
56 changes: 56 additions & 0 deletions recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v02.00.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Config for 4 nodes since it's only 10k samples
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2

# Data training arguments
dataset_name: open-r1/ioi-cots
dataset_num_proc: 12

# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_always_push: true
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
hub_model_revision: v02.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
optim: paged_adamw_8bit
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v02.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 1
push_to_hub: true
report_to:
- wandb
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: false # fails on multi-node
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03
57 changes: 57 additions & 0 deletions recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v03.00.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Config for 4 nodes since it's only 10k samples
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2

# Data training arguments
dataset_name: open-r1/ioi-cots-filtered
dataset_config: stop_compile
dataset_num_proc: 12

# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_always_push: true
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
hub_model_revision: v03.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
optim: paged_adamw_8bit
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v03.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 1
push_to_hub: true
report_to:
- wandb
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: false # fails on multi-node
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03
57 changes: 57 additions & 0 deletions recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v04.00.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Config for 4 nodes since it's only 10k samples
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2

# Data training arguments
dataset_name: open-r1/ioi-cots-filtered
dataset_config: stop_compile_subtask_score_gt_0
dataset_num_proc: 12

# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_always_push: true
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
hub_model_revision: v04.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
optim: paged_adamw_8bit
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v04.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 1
push_to_hub: true
report_to:
- wandb
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: false # fails on multi-node
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03
Loading