[release] bump to v0.1.0

zhuzilin · zhuzilin · commit a14b35324d57 · 2025-08-29T14:25:46.000-07:00
diff --git a/.github/workflows/conda-ci.yml b/.github/workflows/conda-ci.yml
@@ -0,0 +1,81 @@
+name: conda CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  run-when-labeled:
+    # 只有当前这次触发是给 PR 加了 ci:full，或者 PR 本身已带该标签时才运行
+    if: |
+      (github.event.action == 'labeled' && github.event.label.name == 'release')
+      || contains(join(github.event.pull_request.labels.*.name, ','), 'release')
+    container:
+      image: lmsysorg/sglang:v0.5.0rc0-cu126
+      options: --gpus all --ipc=host --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 --memory=0 --memory-swap=0 -v /data/models:/root/models -v /data/datasets:/root/datasets
+
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Construct Conda
+        run: |
+          echo "📦 Installing slime..."
+          cd $GITHUB_WORKSPACE
+          echo "Current directory: $(pwd)"
+
+          bash build_conda.sh
+        shell: bash
+
+      - name: Download model and dataset
+        run: |
+          echo "🔗 Downloading up model and dataset..."
+          
+          # Create cache directories if they don't exist
+          mkdir -p /root/models /root/datasets
+          
+          echo "Downloading Qwen3-30B-A3B..."
+          hf download Qwen/Qwen3-30B-A3B --local-dir /root/models/Qwen3-30B-A3B
+          hf download Qwen/Qwen3-30B-A3B-FP8 --local-dir /root/models/Qwen3-30B-A3B-FP8
+          
+          hf download --repo-type dataset zhuzilin/dapo-math-17k --local-dir /root/datasets/dapo-math-17k
+
+          hf download --repo-type dataset zhuzilin/aime-2024 --local-dir /root/datasets/aime-2024
+        shell: bash
+
+      - name: Convert checkpoint
+        run: |
+          echo "🔄 Converting model checkpoint..."
+          cd $GITHUB_WORKSPACE
+          echo "Current directory: $(pwd)"
+
+          source scripts/models/qwen3-30B-A3B.sh
+          PYTHONPATH=/root/Megatron-LM torchrun --nproc-per-node 8 tools/convert_hf_to_torch_dist.py \
+            ${MODEL_ARGS[@]} \
+            --hf-checkpoint /root/models/Qwen3-30B-A3B \
+            --save /root/Qwen3-30B-A3B_torch_dist
+        shell: bash
+
+      - name: Run tests
+        run: |
+          echo "🧪 Running tests..."
+          cd $GITHUB_WORKSPACE
+          echo "Current directory: $(pwd)"
+
+          bash tests/test_qwen3-30B-A3B.sh
+        shell: bash
+          
+      - name: Cleanup
+        if: always()
+        run: |
+          echo "🧹 Cleaning up..."
+          pkill -9 ray || true
+          ray stop --force || true
+          pkill -9 python || true
+        shell: bash
diff --git a/build_conda.sh b/build_conda.sh
@@ -1,23 +1,11 @@
-# 从零搭建环境
-
-[English](../en/build.md)
-
-在不方便直接使用我们预先准备的镜像的情况下，我们提供了如下的搭建环境的方案：
-
-## 基于 anaconda / mamba 搭建环境
-
-这里我们以 micromamba 为例，在 sglang 的官方镜像 `lmsysorg/sglang:latest` 中搭建一个名为 slime 的 conda 环境：
-
-
-```bash
 ####################
 # create conda
 ####################
 yes '' | "${SHELL}" <(curl -L micro.mamba.pm/install.sh)
 source ~/.bashrc
 micromamba self-update
 
-micromamba create -n slime python=3.10 pip -c conda-forge -y
+micromamba create -n slime python=3.12 pip -c conda-forge -y
 # install cuda-12.6.0 as this is the default cuda version for pytorch
 # and apex need this alignment.
 micromamba install -n slime cuda cuda-nvtx cuda-nvtx-dev -c nvidia/label/cuda-12.6.0 -y
@@ -29,7 +17,7 @@ micromamba run -n slime pip install cmake ninja
 ####################
 export BASE_DIR=/root/
 cd $BASE_DIR
-git clone https://github.com/sgl-project/sglang.git --branch v0.4.9.post2 --depth 1
+git clone https://github.com/sgl-project/sglang.git --branch v0.5.0rc0 --depth 1
 cd $BASE_DIR/sglang/
 micromamba run -n slime pip -v install -e "python[all]"
 # TODO: change to pip install sglang-router after it has a new release
@@ -52,17 +40,17 @@ TORCH_CUDA_ARCH_LIST="9.0;9.0a" micromamba run -n slime \
   pip -v install transformer_engine[pytorch]
 # flash attn
 # the newest version megatron supports is v2.7.4.post1
-micromamba run -n slime pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
+micromamba run -n slime MAX_JOBS=64 pip -v install flash-attn==2.7.4.post1
 # megatron
-cd /root/
+cd $BASE_DIR
 git clone https://github.com/NVIDIA/Megatron-LM.git
 cd Megatron-LM/
+git checkout 48406695c4efcf1026a7ed70bb390793918dd97b
 micromamba run -n slime pip install -e .
 
 ####################
 # other deps
 ####################
-micromamba run -n slime pip install git+https://github.com/zhuzilin/cumem_allocator.git --no-build-isolation
 micromamba run -n slime pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
 
 ####################
@@ -74,5 +62,6 @@ cd slime/
 micromamba run -n slime pip install -e .
 # apply patch
 cd $BASE_DIR/sglang
-git apply /root/slime/docker/patch/sglang.patch
-```
+git apply $BASE_DIR/slime/docker/patch/sglang.patch
+cd $BASE_DIR/Megatron-LM
+git apply $BASE_DIR/slime/docker/patch/megatron.patch
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -13,7 +13,6 @@ RUN pip install sglang-router --force-reinstall
 RUN pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-cache-dir --force-reinstall
 RUN pip install ray[default]
 RUN pip install httpx[http2] wandb pylatexenc blobfile accelerate "mcp[cli]"
-RUN pip install git+https://github.com/zhuzilin/cumem_allocator.git
 
 # mbridge
 RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
diff --git a/docs/en/build.md b/docs/en/build.md
diff --git a/docs/en/quick_start.md b/docs/en/quick_start.md
@@ -8,7 +8,7 @@ This document will guide you through setting up the environment and getting star
 
 Since slime may contain temporary patches for sglang/megatron, to avoid potential environment configuration issues, we strongly recommend **users to use our latest Docker image**, which comes pre-configured with all dependencies.
 
-- For scenarios where Docker is not convenient, please refer to [Building Environment from Scratch](./build.md);
+- For scenarios where Docker is not convenient, please refer to [build_conda.sh](./../../build_conda.sh);
 - For AMD support, please refer to [AMD Usage Tutorial](./amd_tutorial.md).
 
 ### Pull and Start Docker Container
diff --git a/docs/zh/quick_start.md b/docs/zh/quick_start.md
@@ -8,7 +8,7 @@
 
 由于 slime 可能会包含针对 sglang/megatron 的临时补丁（patch）。为避免潜在的环境配置问题，强烈建议**用户使用我们提供的最新 Docker 镜像**，它已预置好所有依赖。
 
-- 对于不方便使用 docker 的场景，请参考 [从零搭建环境](./build.md)；
+- 对于不方便使用 docker 的场景，请参考 [build_conda.sh](./../../build_conda.sh)；
 - 对于 AMD 支持，请参考 [AMD 使用教程](../en/amd_tutorial.md)。
 
 ### 拉取并启动 Docker 容器