huggingface · edbeeching · Feb 26, 2025 · Mar 1, 2025 · Mar 1, 2025 · Mar 2, 2025
diff --git a/recipes/QwQ-32B/sft/config_v00.00.yaml b/recipes/QwQ-32B/sft/config_v00.00.yaml
@@ -0,0 +1,53 @@
+# Config for 16 nodes to go fast
+# Model arguments
+model_name_or_path: Qwen/QwQ-32B
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: solutions
+dataset_num_proc: 1
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/QwQ-32B-SFT
+hub_model_revision: v00.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/QwQ-SFT-v00.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: false # fails on multi-node
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
diff --git a/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v00.00.yaml b/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v00.00.yaml
@@ -0,0 +1,57 @@
+# Config for 16 nodes to go fast
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: solutions
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_always_push: true
+hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
+hub_model_revision: v00.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+optim: paged_adamw_8bit
+output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v00.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+push_to_hub: true
+report_to:
+- wandb
+save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: false # fails on multi-node
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
diff --git a/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v01.00.yaml b/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v01.00.yaml
@@ -0,0 +1,57 @@
+# Config for 16 nodes to go fast
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-ioi-cots-mix
+dataset_config: solutions_stop_compile
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_always_push: true
+hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
+hub_model_revision: v01.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+optim: paged_adamw_8bit
+output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v01.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+push_to_hub: true
+report_to:
+- wandb
+save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: false # fails on multi-node
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
diff --git a/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v02.00.yaml b/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v02.00.yaml
@@ -0,0 +1,56 @@
+# Config for 4 nodes since it's only 10k samples
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/ioi-cots
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_always_push: true
+hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
+hub_model_revision: v02.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+optim: paged_adamw_8bit
+output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v02.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+push_to_hub: true
+report_to:
+- wandb
+save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: false # fails on multi-node
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
diff --git a/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v03.00.yaml b/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v03.00.yaml
@@ -0,0 +1,57 @@
+# Config for 4 nodes since it's only 10k samples
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/ioi-cots-filtered
+dataset_config: stop_compile
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_always_push: true
+hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
+hub_model_revision: v03.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+optim: paged_adamw_8bit
+output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v03.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+push_to_hub: true
+report_to:
+- wandb
+save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: false # fails on multi-node
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
diff --git a/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v04.00.yaml b/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v04.00.yaml
@@ -0,0 +1,57 @@
+# Config for 4 nodes since it's only 10k samples
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/ioi-cots-filtered
+dataset_config: stop_compile_subtask_score_gt_0
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_always_push: true
+hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
+hub_model_revision: v04.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+optim: paged_adamw_8bit
+output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v04.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+push_to_hub: true
+report_to:
+- wandb
+save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: false # fails on multi-node
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03