Skip to content

Commit d0b4de5

Browse files
committed
deploy: 88cac0a
1 parent c89dac3 commit d0b4de5

File tree

71 files changed

+345
-195
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+345
-195
lines changed

.doctrees/environment.pickle

139 Bytes
Binary file not shown.
-1.28 KB
Binary file not shown.
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
#!/bin/bash
2+
3+
# for rerun the task
4+
pkill -9 sglang
5+
sleep 3
6+
ray stop --force
7+
pkill -9 ray
8+
pkill -9 python
9+
sleep 3
10+
pkill -9 ray
11+
pkill -9 python
12+
13+
14+
set -euxo pipefail
15+
16+
17+
### AMD Support ###
18+
SLIME_DIR="${SLIME_DIR:-/home/yushensu/projects/slime}" # Default path if not set in environment
19+
export SLIME_DIR
20+
21+
MODEL_DIR="${MODEL_DIR:-/home/yushensu/projects/model}" # Default path if not set in environment
22+
export MODEL_DIR
23+
24+
DATA_DIR="${DATA_DIR:-/home/yushensu/projects/data}" # Default path if not set in environment
25+
export DATA_DIR
26+
27+
# For AMD GPU
28+
export RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES=${RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES:-"1"} # Must set to 1
29+
export HIP_VISIBLE_DEVICES=${HIP_VISIBLE_DEVICES:-"0,1,2,3,4,5,6,7"} #You can choose which gpus to use
30+
####################
31+
32+
33+
# will prevent ray from buffering stdout/stderr
34+
export PYTHONBUFFERED=16
35+
36+
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
37+
source "${SCRIPT_DIR}/models/qwen3-4B.sh"
38+
39+
CKPT_ARGS=(
40+
--hf-checkpoint ${MODEL_DIR}/Qwen3-4B
41+
--ref-load ${MODEL_DIR}/Qwen3-4B_torch_dist
42+
--load ${MODEL_DIR}/Qwen3-4B_slime/
43+
--save ${MODEL_DIR}/Qwen3-4B_slime/
44+
--save-interval 20
45+
)
46+
47+
ROLLOUT_ARGS=(
48+
--prompt-data ${DATA_DIR}/dapo-math-17k/dapo-math-17k.jsonl
49+
--input-key prompt
50+
--label-key label
51+
--apply-chat-template
52+
--rollout-shuffle
53+
--rm-type deepscaler
54+
--num-rollout 3000
55+
--rollout-batch-size 32
56+
--n-samples-per-prompt 8
57+
--rollout-max-response-len 8192
58+
--rollout-temperature 0.8
59+
60+
--global-batch-size 256
61+
--balance-data
62+
)
63+
64+
EVAL_ARGS=(
65+
--eval-interval 20
66+
--eval-prompt-data aime ${DATA_DIR}/aime-2024/aime-2024.jsonl
67+
--n-samples-per-eval-prompt 16
68+
--eval-max-response-len 16384
69+
--eval-top-p 0.7
70+
)
71+
72+
PERF_ARGS=(
73+
--tensor-model-parallel-size 2
74+
--sequence-parallel
75+
--pipeline-model-parallel-size 1
76+
--context-parallel-size 1
77+
--expert-model-parallel-size 1
78+
--expert-tensor-parallel-size 1
79+
80+
--recompute-granularity full
81+
--recompute-method uniform
82+
--recompute-num-layers 1
83+
84+
# --micro-batch-size 1
85+
--use-dynamic-batch-size
86+
--max-tokens-per-gpu 9216
87+
)
88+
89+
GRPO_ARGS=(
90+
--advantage-estimator grpo
91+
--use-kl-loss
92+
--kl-loss-coef 0.00
93+
--kl-loss-type low_var_kl
94+
--entropy-coef 0.00
95+
--eps-clip 0.2
96+
--eps-clip-high 0.28
97+
)
98+
99+
OPTIMIZER_ARGS=(
100+
--optimizer adam
101+
--lr 1e-6
102+
--lr-decay-style constant
103+
--weight-decay 0.1
104+
--adam-beta1 0.9
105+
--adam-beta2 0.98
106+
)
107+
108+
WANDB_ARGS=(
109+
# --use-wandb
110+
# --wandb-project slime-dev
111+
# --wandb-group qwen3-4B-test
112+
# --wandb-key ${WANDB_KEY}
113+
)
114+
115+
SGLANG_ARGS=(
116+
--rollout-num-gpus-per-engine 2
117+
--sglang-mem-fraction-static 0.7
118+
)
119+
120+
MISC_ARGS=(
121+
# default dropout in megatron is 0.1
122+
--attention-dropout 0.0
123+
--hidden-dropout 0.0
124+
# should be good for model performance
125+
--accumulate-allreduce-grads-in-fp32
126+
--attention-softmax-in-fp32
127+
# need to comment this when using model with MLA
128+
--attention-backend flash
129+
### AMD Support ###
130+
# disable gradient accumulation fusion: Need to add apex to enable this
131+
--no-gradient-accumulation-fusion
132+
###################
133+
)
134+
135+
# launch the master node of ray in container
136+
export MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
137+
138+
NUM_GPUS=$(echo ${HIP_VISIBLE_DEVICES} | tr ',' '\n' | wc -l)
139+
ray start --head --node-ip-address ${MASTER_ADDR} --num-gpus ${NUM_GPUS} --disable-usage-stats --dashboard-host=0.0.0.0 --dashboard-port=8265
140+
141+
142+
# "PYTHONPATH": "/workspace/Megatron-LM/",
143+
MEGATRON_LM_PATH=$(pip list | grep megatron-core | awk '{print $NF}')
144+
145+
ray job submit --address="http://127.0.0.1:8265" \
146+
--runtime-env-json='{
147+
"env_vars": {
148+
"PYTHONPATH": "/workspace/Megatron-LM/",
149+
"CUDA_DEVICE_MAX_CONNECTIONS": "1"
150+
}
151+
}' \
152+
-- python3 train.py \
153+
--actor-num-nodes 1 \
154+
--actor-num-gpus-per-node 8 \
155+
--colocate \
156+
${MODEL_ARGS[@]} \
157+
${CKPT_ARGS[@]} \
158+
${ROLLOUT_ARGS[@]} \
159+
${OPTIMIZER_ARGS[@]} \
160+
${GRPO_ARGS[@]} \
161+
${WANDB_ARGS[@]} \
162+
${PERF_ARGS[@]} \
163+
${EVAL_ARGS[@]} \
164+
${SGLANG_ARGS[@]} \
165+
${MISC_ARGS[@]}

_examples_synced/eval/README.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
<link rel="search" title="Search" href="../../search.html" />
5353
<meta name="viewport" content="width=device-width, initial-scale=1"/>
5454
<meta name="docsearch:language" content="en"/>
55-
<meta name="docbuild:last-update" content="Dec 10, 2025"/>
55+
<meta name="docbuild:last-update" content="Dec 11, 2025"/>
5656
</head>
5757

5858

@@ -599,7 +599,7 @@ <h2>4) Inside the Skills container<a class="headerlink" href="#inside-the-skills
599599

600600
<div class="footer-item">
601601
<p class="last-updated">
602-
Last updated on Dec 10, 2025.
602+
Last updated on Dec 11, 2025.
603603
<br/>
604604
</p>
605605
</div>

_examples_synced/eval_multi_task/README.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
<link rel="search" title="Search" href="../../search.html" />
5353
<meta name="viewport" content="width=device-width, initial-scale=1"/>
5454
<meta name="docsearch:language" content="en"/>
55-
<meta name="docbuild:last-update" content="Dec 10, 2025"/>
55+
<meta name="docbuild:last-update" content="Dec 11, 2025"/>
5656
</head>
5757

5858

@@ -534,7 +534,7 @@ <h2>IFBench Notes<a class="headerlink" href="#ifbench-notes" title="Link to this
534534

535535
<div class="footer-item">
536536
<p class="last-updated">
537-
Last updated on Dec 10, 2025.
537+
Last updated on Dec 11, 2025.
538538
<br/>
539539
</p>
540540
</div>

_examples_synced/fully_async/README.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
<link rel="prev" title="Search-R1 lite" href="../search-r1/README.html" />
5555
<meta name="viewport" content="width=device-width, initial-scale=1"/>
5656
<meta name="docsearch:language" content="en"/>
57-
<meta name="docbuild:last-update" content="Dec 10, 2025"/>
57+
<meta name="docbuild:last-update" content="Dec 11, 2025"/>
5858
</head>
5959

6060

@@ -595,7 +595,7 @@ <h2>Config Differences (2 Key Points)<a class="headerlink" href="#config-differe
595595

596596
<div class="footer-item">
597597
<p class="last-updated">
598-
Last updated on Dec 10, 2025.
598+
Last updated on Dec 11, 2025.
599599
<br/>
600600
</p>
601601
</div>

_examples_synced/geo3k_vlm/README.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
<link rel="search" title="Search" href="../../search.html" />
5353
<meta name="viewport" content="width=device-width, initial-scale=1"/>
5454
<meta name="docsearch:language" content="en"/>
55-
<meta name="docbuild:last-update" content="Dec 10, 2025"/>
55+
<meta name="docbuild:last-update" content="Dec 11, 2025"/>
5656
</head>
5757

5858

@@ -552,7 +552,7 @@ <h3>Numerical Precision with Non-Binary Rewards<a class="headerlink" href="#nume
552552

553553
<div class="footer-item">
554554
<p class="last-updated">
555-
Last updated on Dec 10, 2025.
555+
Last updated on Dec 11, 2025.
556556
<br/>
557557
</p>
558558
</div>

_examples_synced/low_precision/README.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
<link rel="search" title="Search" href="../../search.html" />
5353
<meta name="viewport" content="width=device-width, initial-scale=1"/>
5454
<meta name="docsearch:language" content="en"/>
55-
<meta name="docbuild:last-update" content="Dec 10, 2025"/>
55+
<meta name="docbuild:last-update" content="Dec 11, 2025"/>
5656
</head>
5757

5858

@@ -579,7 +579,7 @@ <h2>TODO<a class="headerlink" href="#todo" title="Link to this heading">#</a></h
579579

580580
<div class="footer-item">
581581
<p class="last-updated">
582-
Last updated on Dec 10, 2025.
582+
Last updated on Dec 11, 2025.
583583
<br/>
584584
</p>
585585
</div>

_examples_synced/multi_agent/README.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
<link rel="prev" title="Retool: from SFT to RL" href="../retool/README.html" />
5555
<meta name="viewport" content="width=device-width, initial-scale=1"/>
5656
<meta name="docsearch:language" content="en"/>
57-
<meta name="docbuild:last-update" content="Dec 10, 2025"/>
57+
<meta name="docbuild:last-update" content="Dec 11, 2025"/>
5858
</head>
5959

6060

@@ -583,7 +583,7 @@ <h2>New Arguments<a class="headerlink" href="#new-arguments" title="Link to this
583583

584584
<div class="footer-item">
585585
<p class="last-updated">
586-
Last updated on Dec 10, 2025.
586+
Last updated on Dec 11, 2025.
587587
<br/>
588588
</p>
589589
</div>

_examples_synced/reproducibility/README.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
<link rel="prev" title="DeepSeek R1 with 128xH100" href="../../examples/deepseek-r1.html" />
5555
<meta name="viewport" content="width=device-width, initial-scale=1"/>
5656
<meta name="docsearch:language" content="en"/>
57-
<meta name="docbuild:last-update" content="Dec 10, 2025"/>
57+
<meta name="docbuild:last-update" content="Dec 11, 2025"/>
5858
</head>
5959

6060

@@ -545,7 +545,7 @@ <h1>Reproducibility<a class="headerlink" href="#reproducibility" title="Link to
545545

546546
<div class="footer-item">
547547
<p class="last-updated">
548-
Last updated on Dec 10, 2025.
548+
Last updated on Dec 11, 2025.
549549
<br/>
550550
</p>
551551
</div>

0 commit comments

Comments
 (0)