Skip to content

Commit f6f005c

Browse files
authored
Merge branch 'PaddlePaddle:develop' into feat/qwen2_5_vl_add_20251118
2 parents 99beb35 + a483fb1 commit f6f005c

37 files changed

+512
-96
lines changed

examples/config/dpo/full.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
### data
22
train_dataset_type: erniekit
33
eval_dataset_type: erniekit
4-
train_dataset_path: ./data/dpo/train.jsonl
4+
train_dataset_path: ./tests/fixtures/dummy/dpo/train.jsonl
55
train_dataset_prob: "1.0"
6-
eval_dataset_path: ./data/dpo/dev.jsonl
6+
eval_dataset_path: ./tests/fixtures/dummy/dpo/eval.jsonl
77
eval_dataset_prob: "1.0"
88
max_seq_len: 8192
99
packing: false
@@ -31,7 +31,7 @@ save_strategy: steps
3131
logging_steps: 1
3232
gradient_accumulation_steps: 4
3333
logging_dir: ./vdl_log
34-
output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_ckpts
34+
output_dir: ./checkpoints/qwen3-dpo-full
3535
disable_tqdm: true
3636
eval_accumulation_steps: 16
3737

examples/config/dpo/full_function_call.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
### data
22
train_dataset_type: chatml
33
eval_dataset_type: chatml
4-
train_dataset_path: ./data/dpo_fc/train.jsonl
4+
train_dataset_path: ./tests/fixtures/dummy/function-call/train.jsonl
55
train_dataset_prob: "1.0"
6-
eval_dataset_path: ./data/dpo_fc/test.jsonl
6+
eval_dataset_path: ./tests/fixtures/dummy/function-call/eval.jsonl
77
eval_dataset_prob: "1.0"
88
max_seq_len: 8192
99
packing: false
@@ -33,7 +33,7 @@ save_strategy: steps
3333
logging_steps: 1
3434
gradient_accumulation_steps: 4
3535
logging_dir: ./vdl_log
36-
output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_fc_ckpts
36+
output_dir: ./checkpoints/qwen3-dpo-full-fc
3737
disable_tqdm: true
3838
eval_accumulation_steps: 16
3939

examples/config/dpo/full_tp_pp.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
### data
22
train_dataset_type: erniekit
33
eval_dataset_type: erniekit
4-
train_dataset_path: ./data/dpo/train.jsonl
4+
train_dataset_path: ./tests/fixtures/dummy/dpo/train.jsonl
55
train_dataset_prob: "1.0"
6-
eval_dataset_path: ./data/dpo/dev.jsonl
6+
eval_dataset_path: ./tests/fixtures/dummy/dpo/eval.jsonl
77
eval_dataset_prob: "1.0"
88
max_seq_len: 8192
99
num_samples_each_epoch: 6000000
@@ -32,7 +32,7 @@ save_strategy: steps
3232
logging_steps: 1
3333
gradient_accumulation_steps: 4
3434
logging_dir: ./vdl_log
35-
output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_ckpts_parallel
35+
output_dir: ./checkpoints/qwen3-dpo-full-tp-pp
3636
disable_tqdm: true
3737
eval_accumulation_steps: 16
3838

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
### data
2+
train_dataset_type: erniekit
3+
eval_dataset_type: erniekit
4+
train_dataset_path: ./tests/fixtures/dummy/dpo/train.jsonl
5+
train_dataset_prob: "1.0"
6+
eval_dataset_path: ./tests/fixtures/dummy/dpo/eval.jsonl
7+
eval_dataset_prob: "1.0"
8+
max_seq_len: 8192
9+
num_samples_each_epoch: 6000000
10+
packing: true
11+
mix_strategy: concat
12+
13+
### model
14+
model_name_or_path: Qwen/Qwen3-0.6B-Base
15+
attn_impl: flashmask
16+
17+
### finetuning
18+
# base
19+
stage: DPO
20+
fine_tuning: full
21+
seed: 23
22+
do_train: true
23+
do_eval: true
24+
per_device_eval_batch_size: 1
25+
per_device_train_batch_size: 1
26+
num_train_epochs: 1
27+
max_steps: 10
28+
eval_steps: 100
29+
evaluation_strategy: steps
30+
save_steps: 100
31+
save_total_limit: 1
32+
save_strategy: steps
33+
logging_steps: 1
34+
gradient_accumulation_steps: 4
35+
logging_dir: ./vdl_log
36+
output_dir: ./checkpoints/qwen3-dpo-full-tp-pp-ep
37+
disable_tqdm: true
38+
eval_accumulation_steps: 16
39+
40+
# train
41+
warmup_steps: 20
42+
learning_rate: 1.0e-6
43+
44+
# performance
45+
tensor_parallel_degree: 4
46+
pipeline_parallel_degree: 2
47+
pipeline_parallel_config: enable_clear_every_step_cache disable_partial_send_recv disable_batch_p2p_comm
48+
sequence_parallel: true
49+
sharding: stage1
50+
recompute: true
51+
bf16: true
52+
fp16_opt_level: O2
53+
unified_checkpoint: true
54+
use_expert_parallel: true
55+
expert_parallel_degree: 4
56+
sharding_parallel_config: "split_param"
57+
amp_master_grad: true
58+
tensor_parallel_config: enable_delay_scale_loss sync_param sync_grad
59+
unified_checkpoint_config: ignore_merge_optimizer

examples/config/dpo/lora.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
### data
22
train_dataset_type: erniekit
33
eval_dataset_type: erniekit
4-
train_dataset_path: ./data/dpo/train.jsonl
4+
train_dataset_path: ./tests/fixtures/dummy/dpo/train.jsonl
55
train_dataset_prob: "1.0"
6-
eval_dataset_path: ./data/dpo/dev.jsonl
6+
eval_dataset_path: ./tests/fixtures/dummy/dpo/eval.jsonl
77
eval_dataset_prob: "1.0"
88
max_seq_len: 8192
99
packing: false
@@ -33,7 +33,7 @@ save_strategy: steps
3333
logging_steps: 1
3434
gradient_accumulation_steps: 4
3535
logging_dir: ./vdl_log
36-
output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_lora_ckpts
36+
output_dir: ./checkpoints/qwen3-dpo-lora
3737
disable_tqdm: true
3838
eval_accumulation_steps: 16
3939

examples/config/dpo/lora_tp_pp.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
### data
22
train_dataset_type: erniekit
33
eval_dataset_type: erniekit
4-
train_dataset_path: ./data/dpo/train.jsonl
4+
train_dataset_path: ./tests/fixtures/dummy/dpo/train.jsonl
55
train_dataset_prob: "1.0"
6-
eval_dataset_path: ./data/dpo/dev.jsonl
6+
eval_dataset_path: ./tests/fixtures/dummy/dpo/eval.jsonl
77
eval_dataset_prob: "1.0"
88
max_seq_len: 8192
99
packing: true
@@ -33,7 +33,7 @@ save_strategy: steps
3333
logging_steps: 1
3434
gradient_accumulation_steps: 4
3535
logging_dir: ./vdl_log
36-
output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_lora_ckpts_parallel
36+
output_dir: ./checkpoints/qwen3-dpo-lora-tp-pp
3737
disable_tqdm: true
3838
eval_accumulation_steps: 16
3939

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
### data
2+
train_dataset_type: erniekit
3+
eval_dataset_type: erniekit
4+
train_dataset_path: ./tests/fixtures/dummy/dpo/train.jsonl
5+
train_dataset_prob: "1.0"
6+
eval_dataset_path: ./tests/fixtures/dummy/dpo/eval.jsonl
7+
eval_dataset_prob: "1.0"
8+
max_seq_len: 8192
9+
packing: true
10+
mix_strategy: concat
11+
12+
### model
13+
model_name_or_path: Qwen/Qwen3-0.6B-Base
14+
attn_impl: flashmask
15+
lora: true
16+
lora_rank: 8
17+
18+
### finetuning
19+
# base
20+
stage: DPO
21+
fine_tuning: lora
22+
seed: 23
23+
do_train: true
24+
do_eval: true
25+
per_device_eval_batch_size: 1
26+
per_device_train_batch_size: 1
27+
num_train_epochs: 1
28+
max_steps: 10
29+
eval_steps: 100
30+
evaluation_strategy: steps
31+
save_steps: 100
32+
save_strategy: steps
33+
logging_steps: 1
34+
gradient_accumulation_steps: 4
35+
logging_dir: ./vdl_log
36+
output_dir: ./checkpoints/qwen3-dpo-lora-tp-pp-ep
37+
disable_tqdm: true
38+
eval_accumulation_steps: 16
39+
40+
# train
41+
warmup_steps: 20
42+
learning_rate: 1.0e-4
43+
44+
# performance
45+
tensor_parallel_degree: 4
46+
pipeline_parallel_degree: 2
47+
expert_parallel_degree: 4
48+
use_expert_parallel: true
49+
sequence_parallel: true
50+
pipeline_parallel_config: enable_clear_every_step_cache disable_partial_send_recv
51+
sharding: stage1
52+
recompute: true
53+
bf16: true
54+
fp16_opt_level: O2
55+
unified_checkpoint: true
56+
amp_master_grad: true

examples/config/pt/full.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
### data
22
train_dataset_type: erniekit
33
eval_dataset_type: erniekit
4-
train_dataset_path: ./data/pt/train.jsonl
4+
train_dataset_path: ./tests/fixtures/dummy/pt/train.jsonl
55
train_dataset_prob: "1.0"
6-
eval_dataset_path: ./data/pt/eval.jsonl
6+
eval_dataset_path: ./tests/fixtures/dummy/pt/eval.jsonl
77
eval_dataset_prob: "1.0"
88
max_seq_len: 8192
99
mix_strategy: concat
@@ -30,7 +30,7 @@ save_strategy: steps
3030
logging_steps: 1
3131
gradient_accumulation_steps: 4
3232
logging_dir: ./vdl_log
33-
output_dir: ./checkpoints/qwen3_hf_0p6b_sft_ckpts
33+
output_dir: ./checkpoints/qwen3-pt-full
3434
disable_tqdm: true
3535
eval_accumulation_steps: 16
3636

examples/config/pt/full_offline_data.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ save_strategy: steps
2828
logging_steps: 1
2929
gradient_accumulation_steps: 4
3030
logging_dir: ./vdl_log
31-
output_dir: ./checkpoints/qwen3_hf_0p6b_sft_ckpts
31+
output_dir: ./checkpoints/qwen3-pt-full-offline
3232
disable_tqdm: true
3333
eval_accumulation_steps: 16
3434

examples/config/pt/full_tp_pp.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
### data
22
train_dataset_type: erniekit
33
eval_dataset_type: erniekit
4-
train_dataset_path: ./data/pt/train.jsonl
4+
train_dataset_path: ./tests/fixtures/dummy/pt/train.jsonl
55
train_dataset_prob: "1.0"
6-
eval_dataset_path: ./data/pt/eval.jsonl
6+
eval_dataset_path: ./tests/fixtures/dummy/pt/eval.jsonl
77
eval_dataset_prob: "1.0"
88
max_seq_len: 8192
99
mix_strategy: concat
@@ -30,7 +30,7 @@ save_strategy: steps
3030
logging_steps: 1
3131
gradient_accumulation_steps: 4
3232
logging_dir: ./vdl_log
33-
output_dir: ./checkpoints/qwen3_hf_0p6b_sft_ckpts_parallel
33+
output_dir: ./checkpoints/qwen3-pt-full-tp-pp
3434
disable_tqdm: true
3535
eval_accumulation_steps: 16
3636

0 commit comments

Comments
 (0)