[docs] move reporducibility to main doc (#1433)

zhuzilin · web-flow · commit 977697023474 · 2026-01-16T19:57:02.000+08:00
diff --git a/docs/en/advanced/low-precision.md b/docs/en/advanced/low-precision.md
@@ -1,6 +1,6 @@
 # Low Precision Training
 
-- [FP8 rollout and FP8 training](#FP8-rollout-and-FP8-training)
+- [FP8 rollout and FP8 training](#FP8-rollout-and-BF16-training)
 - [FP8 rollout and FP8 training](#FP8-rollout-and-FP8-training)
 - [INT4 QAT Training](#INT4-QAT-Training)
 
diff --git a/docs/en/advanced/reproducibility.md b/docs/en/advanced/reproducibility.md
@@ -0,0 +1,52 @@
+# Reproducibility
+
+Reproducibility is a bedrock of scientific progress. 通过结合 SGLang 提供的 [确定性推理](https://lmsys.org/blog/2025-09-22-sglang-deterministic/) 和 Megatron-LM 的确定性模式，slime 可以提供完全确定性（bitwise）的实验复现能力。
+
+为了开启确定性训练，你需要通过 `pip uninstall flash_attn_3 -y` 卸载 flash attention 3，并设置：
+
+```bash
+  # sglang config
+  --sglang-enable-deterministic-inference
+  --sglang-attention-backend flashinfer
+
+  # megatron config
+  --deterministic-mode
+```
+
+以及设置如下环境变量：
+
+```bash
+     "env_vars": {
+        ...,
+        "NCCL_ALGO": "Ring",
+        "NVTE_ALLOW_NONDETERMINISTIC_ALGO": "0",
+        "CUBLAS_WORKSPACE_CONFIG": ":4096:8"
+     }
+```
+
+我们提供了一个完全确定性的，用 Qwen2.5 0.5B 训练 GSM8K 的脚本。
+
+可以用如下脚本初始化训练数据和 ckpt：
+
+```bash
+# download
+hf download --repo-type dataset zhuzilin/gsm8k --local-dir /root/gsm8k
+hf download Qwen/Qwen2.5-0.5B-Instruct --local-dir /root/Qwen2.5-0.5B-Instruct
+
+# convert ckpt
+cd slime/
+source scripts/models/qwen2.5-0.5B.sh
+PYTHONPATH=/root/Megatron-LM/ python \
+   tools/convert_hf_to_torch_dist.py \
+   ${MODEL_ARGS[@]} \
+   --hf-checkpoint /root/Qwen2.5-0.5B-Instruct \
+   --save /root/Qwen2.5-0.5B-Instruct_torch_dist/
+```
+
+可以使用如下脚本进行训练：
+
+```bash
+bash script/run-qwen2.5-0.5B-reproducibility.sh
+```
+
+这个 PR 中记录了 wandb 的截图 [pull#370](https://github.com/THUDM/slime/pull/370).
diff --git a/docs/en/index.rst b/docs/en/index.rst
@@ -40,9 +40,9 @@ slime is the RL-framework behind GLM-4.7, GLM-4.6 and GLM-4.5. Apart from models
    :maxdepth: 1
    :caption: Advanced Features
 
-   _examples_synced/reproducibility/README.md
    advanced/speculative-decoding.md
    advanced/low-precision.md
+   advanced/reproducibility.md
    advanced/fault-tolerance.md
    advanced/pd-disaggregation.md
    advanced/arch-support-beyond-megatron.md
diff --git a/docs/zh/advanced/reproducibility.md b/docs/zh/advanced/reproducibility.md
@@ -45,7 +45,7 @@ PYTHONPATH=/root/Megatron-LM/ python \
 And to run training,
 
 ```bash
-bash examples/reproducibility/run-qwen2.5-0.5B-gsm8k.sh
+bash script/run-qwen2.5-0.5B-reproducibility.sh
 ```
 
 For screen shots of the wandb, please refer to [pull#370](https://github.com/THUDM/slime/pull/370).
diff --git a/docs/zh/index.rst b/docs/zh/index.rst
@@ -40,9 +40,9 @@ slime 是 GLM-4.7、GLM-4.6、GLM-4.5 背后的 RL 训练框架。除此之外
    :maxdepth: 1
    :caption: 高级特性
 
-   _examples_synced/reproducibility/README.md
    advanced/speculative-decoding.md
    advanced/low-precision.md
+   advanced/reproducibility.md
    advanced/fault-tolerance.md
    advanced/pd-disaggregation.md
    advanced/arch-support-beyond-megatron.md
diff --git a/scripts/run-qwen2.5-0.5B-reproducibility.sh b/scripts/run-qwen2.5-0.5B-reproducibility.sh
@@ -16,7 +16,7 @@ set -ex
 export PYTHONBUFFERED=16
 
 SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
-source "${SCRIPT_DIR}/../../scripts/models/qwen2.5-0.5B.sh"
+source "${SCRIPT_DIR}/scripts/models/qwen2.5-0.5B.sh"
 
 CKPT_ARGS=(
    --hf-checkpoint /root/Qwen2.5-0.5B-Instruct/