-
Notifications
You must be signed in to change notification settings - Fork 349
Expand file tree
/
Copy pathgrpo-qwen3-30ba3b-4n8g-40K.yaml
More file actions
49 lines (49 loc) · 1.25 KB
/
grpo-qwen3-30ba3b-4n8g-40K.yaml
File metadata and controls
49 lines (49 loc) · 1.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
defaults: ../../../grpo_math_1B.yaml
grpo:
num_prompts_per_step: 64
num_generations_per_prompt: 32
checkpointing:
enabled: false
checkpoint_dir: results/grpo-qwen3-30ba3b-4n8g
policy:
model_name: Qwen/Qwen3-30B-A3B
train_micro_batch_size: 1
logprob_batch_size: 4
max_total_sequence_length: 40960
dtensor_cfg:
enabled: false
optimizer: null
scheduler: null
make_sequence_length_divisible_by: ${mul:${policy.megatron_cfg.tensor_model_parallel_size},
${mul:2, ${policy.megatron_cfg.context_parallel_size}}}
megatron_cfg:
enabled: true
empty_unused_memory_level: 1
tensor_model_parallel_size: 4
pipeline_model_parallel_size: 1
expert_model_parallel_size: 8
sequence_parallel: true
context_parallel_size: 8
moe_grouped_gemm: true
optimizer:
lr: 3.0e-07
min_lr: 3.0e-08
scheduler:
lr_warmup_iters: 50
lr_warmup_init: 3.0e-08
env_vars:
PYTORCH_CUDA_ALLOC_CONF: expandable_segments:False
activation_checkpointing: true
generation:
vllm_cfg:
tensor_parallel_size: 2
logger:
log_dir: logs/grpo-qwen3-30ba3b-4n8g
wandb_enabled: true
tensorboard_enabled: true
wandb:
project: nemo-rl
name: grpo-qwen3-30ba3b-4n8g
cluster:
gpus_per_node: 8
num_nodes: 4