Skip to content

Commit 48bf8aa

Browse files
committed
config
1 parent 21631ac commit 48bf8aa

25 files changed

+5551
-0
lines changed

scripts/partial_rollout/README.md

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
## Build Docker
2+
3+
```
4+
docker run -it \
5+
--device /dev/dri \
6+
--device /dev/kfd \
7+
-p 8265:8265 \
8+
--group-add video \
9+
--cap-add SYS_PTRACE \
10+
--security-opt seccomp=unconfined \
11+
--privileged \
12+
-v $HOME/.ssh:/root/.ssh \
13+
-v $HOME:$HOME \
14+
--shm-size 128G \
15+
--name slime_yuzhen \
16+
--ulimit memlock=-1 \
17+
--ulimit stack=67108864 \
18+
-w $PWD \
19+
yushengsuthu/slime:slime_ubuntu22.04_rocm6.3.4-patch-numa-patch_sglang0.4.9_megatron-core-patch_ray2.47.1_apex_torch-memory-saver0.0.8-patch
20+
/bin/bash
21+
```
22+
23+
## Clone Repo
24+
25+
```
26+
git clone https://github.com/zyzshishui/slime_.git
27+
```
28+
29+
## Setup Environment
30+
31+
```
32+
vim ~/.bashrc
33+
export PYTHONPATH=/workspace/Megatron-LM-amd_version
34+
export WANDB_API_KEY="cd411df8b73eb3f5c1ae1220cc1ec4e3c9d1f86e"
35+
36+
source ~/.bashrc
37+
```
38+
39+
## Download Models
40+
41+
```
42+
# Qwen3-4B
43+
huggingface-cli download Qwen/Qwen3-4B --local-dir /root/Qwen3-4B
44+
huggingface-cli download guapisolo/Qwen3-4B-torch --local-dir /root/Qwen3-4B_torch
45+
46+
# DeepSeek-R1-Distill-Qwen-1.5B
47+
huggingface-cli download deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --local-dir /root/DeepSeek-R1-Distill-Qwen-1.5B
48+
huggingface-cli download zyzshishui0627/DeepSeek-R1-Distill-Qwen-1.5B_torch_dist --local-dir /root/DeepSeek-R1-Distill-Qwen-1.5B_torch
49+
```
50+
51+
## Download Datas
52+
53+
```
54+
# dapo
55+
huggingface-cli download --repo-type dataset zhuzilin/dapo-math-17k --local-dir /root/dapo-math-17k
56+
57+
# DeepScaler
58+
huggingface-cli download --repo-type dataset zyzshishui0627/DeepScaleR-openai-format --local-dir /root/DeepScaleR
59+
60+
# aime(eval)
61+
huggingface-cli download --repo-type dataset zhuzilin/aime-2024 --local-dir /root/aime-2024
62+
```
63+
64+
## Run Script
65+
66+
for example:
67+
68+
```
69+
nohup bash scripts/partial_rollout/qwen/grpo/run-qwen3-4B-dapo.sh > qwen3-4B-dapo.out
70+
```
71+
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
#!/bin/bash
2+
3+
4+
# bash scripts/partial_rollout/deepseek/dapo/run-qwen3-4B-dapo-partial.sh
5+
6+
7+
####clear before training
8+
pkill -9 sglang
9+
sleep 3
10+
ray stop --force
11+
pkill -9 ray
12+
pkill -9 python
13+
sleep 3
14+
pkill -9 ray
15+
pkill -9 python
16+
17+
18+
set -euxo pipefail
19+
20+
21+
### AMD Support ###
22+
SLIME_DIR="/root/slime" # Need to change to your own path
23+
export SLIME_DIR=$SLIME_DIR
24+
25+
MODEL_DIR="/root" # Need to change to your own path
26+
export MODEL_DIR=$MODEL_DIR
27+
28+
DATA_DIR="/root" # Need to change to your own path
29+
export DATA_DIR=$DATA_DIR
30+
31+
# For AMD GPU
32+
export RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES=${RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES:-"1"} # Must set to 1
33+
export HIP_VISIBLE_DEVICES=${HIP_VISIBLE_DEVICES:-"0,1,2,3,4,5,6,7"} #You can choose which gpus to use
34+
####################
35+
36+
37+
# ### AMD Support ### (If you do not istall, please install them)
38+
# # # Clone and install Megatron-LMi-amd_version
39+
# export MAX_JOBS=512
40+
# cd $SLIME_DIR
41+
# pip uninstall megatron-core -y
42+
# if [ ! -d "Megatron-LM-amd_version" ]; then
43+
# git clone git@github.com:yushengsu-thu/Megatron-LM-amd_version.git
44+
# else
45+
# echo "Megatron-LM-amd_version directory already exists, skipping clone"
46+
# fi
47+
# cd Megatron-LM-amd_version
48+
# pip install -vvv -e .
49+
# cd $SLIME_DIR
50+
51+
# # Install slime
52+
# pip install -e .
53+
# ####################
54+
55+
56+
57+
# will prevent ray from buffering stdout/stderr
58+
export PYTHONBUFFERED=16
59+
60+
61+
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
62+
source "${SCRIPT_DIR}/../../../models/deepseek-r1-distill-qwen-1.5B.sh"
63+
64+
CKPT_ARGS=(
65+
--hf-checkpoint ${MODEL_DIR}/DeepSeek-R1-Distill-Qwen-1.5B
66+
#--hf-checkpoint /root/Qwen3-4B-FP8
67+
--ref-load ${MODEL_DIR}/DeepSeek-R1-Distill-Qwen-1.5B_torch
68+
--load ${MODEL_DIR}/DeepSeek-R1-Distill-Qwen-1.5B_slime/
69+
--save ${MODEL_DIR}/DeepSeek-R1-Distill-Qwen-1.5B_slime/
70+
--save-interval 20000
71+
)
72+
73+
ROLLOUT_ARGS=(
74+
--prompt-data ${DATA_DIR}/dapo-math-17k/dapo-math-17k.jsonl
75+
--input-key prompt
76+
--label-key label
77+
--apply-chat-template
78+
--rollout-shuffle
79+
80+
--rm-type deepscaler
81+
82+
--num-epoch 1
83+
--rollout-batch-size 32
84+
--n-samples-per-prompt 8
85+
--rollout-max-response-len 16384
86+
--rollout-temperature 0.8
87+
88+
--global-batch-size 256
89+
--balance-data
90+
--partial-rollout
91+
--over-sampling-batch-size 64
92+
--dynamic-sampling-filter-path slime.rollout.filter_hub.dynamic_sampling_filters.check_reward_nonzero_std
93+
)
94+
95+
EVAL_ARGS=(
96+
--eval-interval 10
97+
--eval-prompt-data aime ${DATA_DIR}/aime-2024/aime-2024.jsonl
98+
--n-samples-per-eval-prompt 4
99+
--eval-max-response-len 16384
100+
--eval-top-p 0.7
101+
)
102+
103+
PERF_ARGS=(
104+
--tensor-model-parallel-size 2
105+
--sequence-parallel
106+
--pipeline-model-parallel-size 1
107+
--context-parallel-size 1
108+
--expert-model-parallel-size 1
109+
--expert-tensor-parallel-size 1
110+
111+
--recompute-granularity full
112+
--recompute-method uniform
113+
--recompute-num-layers 1
114+
115+
# --micro-batch-size 1
116+
--use-dynamic-batch-size
117+
--max-tokens-per-gpu 9216
118+
)
119+
120+
GRPO_ARGS=(
121+
--advantage-estimator grpo
122+
--use-kl-loss
123+
--kl-loss-coef 0.00
124+
--kl-loss-type low_var_kl
125+
--entropy-coef 0.00
126+
--eps-clip 0.2
127+
--eps-clip-high 0.28
128+
)
129+
130+
OPTIMIZER_ARGS=(
131+
--optimizer adam
132+
--lr 1e-6
133+
--lr-decay-style constant
134+
--weight-decay 0.1
135+
--adam-beta1 0.9
136+
--adam-beta2 0.98
137+
)
138+
139+
WANDB_ARGS=(
140+
--use-wandb
141+
--wandb-project 4B-amd
142+
--wandb-group dapo-deepseek-r1-distill-qwen-1.5B-dapo-partial
143+
--wandb-key ${WANDB_API_KEY}
144+
)
145+
146+
### AMD Support ###
147+
# Need to fix some issue with torch_memory_saver in rocm to support larger --sglang-mem-fraction-static
148+
# SGLANG_ARGS=(
149+
# --rollout-num-gpus-per-engine 2
150+
# --sglang-mem-fraction-static 0.7
151+
# )
152+
SGLANG_ARGS=(
153+
--rollout-num-gpus-per-engine 2
154+
--sglang-mem-fraction-static 0.4
155+
)
156+
####################
157+
158+
159+
MISC_ARGS=(
160+
# default dropout in megatron is 0.1
161+
--attention-dropout 0.0
162+
--hidden-dropout 0.0
163+
# should be good for model performance
164+
--accumulate-allreduce-grads-in-fp32
165+
--attention-softmax-in-fp32
166+
# need to comment this when using model with MLA
167+
--attention-backend flash
168+
### AMD Support ###
169+
# disable gradient accumulation fusion: Need to add apex to enable this
170+
--no-gradient-accumulation-fusion
171+
###################
172+
)
173+
174+
# launch the master node of ray in container
175+
export MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
176+
177+
NUM_GPUS=$(echo ${HIP_VISIBLE_DEVICES} | tr ',' '\n' | wc -l)
178+
ray start --head --node-ip-address ${MASTER_ADDR} --num-gpus ${NUM_GPUS} --disable-usage-stats
179+
180+
181+
# "PYTHONPATH": "/workspace/Megatron-LM-amd_version/",
182+
MEGATRON_LM_PATH=$(pip list | grep megatron-core | awk '{print $NF}')
183+
184+
ray job submit --address="http://127.0.0.1:8265" \
185+
--runtime-env-json='{
186+
"env_vars": {
187+
"PYTHONPATH": "/workspace/Megatron-LM-amd_version/",
188+
"CUDA_DEVICE_MAX_CONNECTIONS": "1"
189+
}
190+
}' \
191+
-- python3 train.py \
192+
--actor-num-nodes 1 \
193+
--actor-num-gpus-per-node 8 \
194+
--rollout-num-gpus-per-node 8 \
195+
--colocate \
196+
${MODEL_ARGS[@]} \
197+
${CKPT_ARGS[@]} \
198+
${ROLLOUT_ARGS[@]} \
199+
${OPTIMIZER_ARGS[@]} \
200+
${GRPO_ARGS[@]} \
201+
${DISTRIBUTED_ARGS[@]} \
202+
${WANDB_ARGS[@]} \
203+
${PERF_ARGS[@]} \
204+
${EVAL_ARGS[@]} \
205+
${SGLANG_ARGS[@]} \
206+
${MISC_ARGS[@]}
207+
208+
209+
210+
####clear after training
211+
212+
pkill -9 sglang
213+
sleep 3
214+
ray stop --force
215+
pkill -9 ray
216+
pkill -9 python
217+
sleep 3
218+
pkill -9 ray
219+
pkill -9 python
220+
221+
222+
223+
224+
225+
226+
227+
228+
229+

0 commit comments

Comments
 (0)