forked from NVIDIA-NeMo/RL
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgrpo-nanov3-30BA3B-2n8g-fsdp2-lora.yaml
More file actions
37 lines (37 loc) · 1022 Bytes
/
grpo-nanov3-30BA3B-2n8g-fsdp2-lora.yaml
File metadata and controls
37 lines (37 loc) · 1022 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
defaults: ../../grpo_math_1B.yaml
grpo:
num_prompts_per_step: 2
num_generations_per_prompt: 8
checkpointing:
checkpoint_dir: results/grpo-nanov3-30BA3B-2n8g-fsdp2-lora
policy:
model_name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16
tokenizer:
name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16
train_global_batch_size: 16
train_micro_batch_size: 1
logprob_batch_size: 1
max_total_sequence_length: 2048
dtensor_cfg:
lora_cfg:
enabled: true
dim: 128
alpha: 512
exclude_modules: ['*out_proj*'] # Exclude all out_proj modules. When NemotronHMamba2Mixer uses cuda_kernels_forward, out_proj LoRA has no gradient.
match_all_linear: false
use_triton: false
sequence_packing:
enabled: false
generation:
vllm_cfg:
tensor_parallel_size: 4
gpu_memory_utilization: 0.7
logger:
wandb_enabled: true
tensorboard_enabled: true
wandb:
project: nemo-rl
name: grpo-nanov3-30BA3B-2n8g-fsdp2-lora
cluster:
gpus_per_node: 8
num_nodes: 2