RL/examples/configs/recipes/llm/grpo-nanov3-30BA3B-2n8g-fsdp2-lora.yaml at main · zhongbozhu/RL · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
defaults: ../../grpo_math_1B.yaml
grpo:
  num_prompts_per_step: 2
  num_generations_per_prompt: 8
checkpointing:
  checkpoint_dir: results/grpo-nanov3-30BA3B-2n8g-fsdp2-lora
policy:
  model_name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16
  tokenizer:
    name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16
  train_global_batch_size: 16
  train_micro_batch_size: 1
  logprob_batch_size: 1
  max_total_sequence_length: 2048
  dtensor_cfg:
    lora_cfg:
      enabled: true
      dim: 128
      alpha: 512
      exclude_modules: ['*out_proj*'] # Exclude all out_proj modules. When NemotronHMamba2Mixer uses cuda_kernels_forward, out_proj LoRA has no gradient.
      match_all_linear: false
      use_triton: false
  sequence_packing:
    enabled: false
  generation:
    vllm_cfg:
      tensor_parallel_size: 4
      gpu_memory_utilization: 0.7
logger:
  wandb_enabled: true
  tensorboard_enabled: true
  wandb:
    project: nemo-rl
    name: grpo-nanov3-30BA3B-2n8g-fsdp2-lora
cluster:
  gpus_per_node: 8
  num_nodes: 2