[release] bump to v0.1.0

zhuzilin · zhuzilin · commit d1bf1f095bf4 · 2025-08-30T02:26:41.000-07:00
diff --git a/.github/workflows/conda-ci.yml b/.github/workflows/conda-ci.yml
@@ -0,0 +1,79 @@
+name: conda CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  build-conda:
+    if: contains(github.event.pull_request.title, '[release]')
+    runs-on: self-hosted
+    container:
+      image: lmsysorg/sglang:v0.5.0rc0-cu126
+      options: --gpus all --ipc=host --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 --memory=0 --memory-swap=0 -v /data/models:/root/models -v /data/datasets:/root/datasets
+
+    defaults:
+      run:
+        working-directory: ${{ github.workspace }}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Construct Conda
+        run: |
+          echo "📦 Installing slime..."
+          cd $GITHUB_WORKSPACE
+          echo "Current directory: $(pwd)"
+
+          BASE_DIR=$(pwd) bash build_conda.sh
+        shell: bash
+
+      - name: Download model and dataset
+        run: |
+          echo "🔗 Downloading up model and dataset..."
+          
+          # Create cache directories if they don't exist
+          mkdir -p /root/models /root/datasets
+          
+          echo "Downloading Qwen3-30B-A3B..."
+          hf download Qwen/Qwen3-30B-A3B --local-dir /root/models/Qwen3-30B-A3B
+          hf download Qwen/Qwen3-30B-A3B-FP8 --local-dir /root/models/Qwen3-30B-A3B-FP8
+          
+          hf download --repo-type dataset zhuzilin/dapo-math-17k --local-dir /root/datasets/dapo-math-17k
+
+          hf download --repo-type dataset zhuzilin/aime-2024 --local-dir /root/datasets/aime-2024
+        shell: bash
+
+      - name: Convert checkpoint
+        run: |
+          echo "🔄 Converting model checkpoint..."
+          cd $GITHUB_WORKSPACE
+          echo "Current directory: $(pwd)"
+
+          source scripts/models/qwen3-30B-A3B.sh
+          PYTHONPATH=/root/Megatron-LM torchrun --nproc-per-node 8 tools/convert_hf_to_torch_dist.py \
+            ${MODEL_ARGS[@]} \
+            --hf-checkpoint /root/models/Qwen3-30B-A3B \
+            --save /root/Qwen3-30B-A3B_torch_dist
+        shell: bash
+
+      - name: Run tests
+        run: |
+          echo "🧪 Running tests..."
+          cd $GITHUB_WORKSPACE
+          echo "Current directory: $(pwd)"
+
+          bash tests/test_qwen3-30B-A3B.sh
+        shell: bash
+          
+      - name: Cleanup
+        if: always()
+        run: |
+          echo "🧹 Cleaning up..."
+          pkill -9 ray || true
+          ray stop --force || true
+          pkill -9 python || true
+        shell: bash
diff --git a/.github/workflows/slime-ci.yml b/.github/workflows/slime-ci.yml
@@ -3,8 +3,10 @@ name: GPU CI
 on:
   push:
     branches: [main]
+    paths: "slime/**"
   pull_request:
     branches: [main]
+    paths: "slime/**"
 
 jobs:
   # GPU tests on self-hosted runner
@@ -13,11 +15,11 @@ jobs:
     container:
       image: zhuzilin/slime:latest
       options: --gpus all --ipc=host --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 --memory=0 --memory-swap=0 -v /data/models:/root/models -v /data/datasets:/root/datasets
-    
+
     defaults:
       run:
         working-directory: ${{ github.workspace }}
-    
+
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -85,11 +87,11 @@ jobs:
     container:
       image: zhuzilin/slime:latest
       options: --gpus all --ipc=host --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 --memory=0 --memory-swap=0 -v /data/models:/root/models -v /data/datasets:/root/datasets
-    
+
     defaults:
       run:
         working-directory: ${{ github.workspace }}
-    
+
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
diff --git a/README.md b/README.md
@@ -61,10 +61,6 @@ For complete usage instructions, please refer to the [Usage Documentation](docs/
 
   - For debugging tips, please refer to the [Debugging Guide](docs/en/debug.md)
 
-## Hardware Support
-- Nvidia: refer to this repo README
-- AMD: refer to the [tutorial](docs/en/amd_tutorial.md)
-
 ## FAQ & Acknowledgements
 
   - For frequently asked questions, please see the [Q\&A](docs/en/qa.md)
diff --git a/build_conda.sh b/build_conda.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+set -ex
+
+# create conda
+yes '' | "${SHELL}" <(curl -L micro.mamba.pm/install.sh)
+source ~/.bashrc
+
+# >>> mamba initialize >>>
+# !! Contents within this block are managed by 'micromamba shell init' !!
+export MAMBA_EXE='/root/.local/bin/micromamba';
+export MAMBA_ROOT_PREFIX='/root/micromamba';
+__mamba_setup="$("$MAMBA_EXE" shell hook --shell bash --root-prefix "$MAMBA_ROOT_PREFIX" 2> /dev/null)"
+eval "$__mamba_setup"
+alias micromamba="$MAMBA_EXE"  # Fallback on help from micromamba activate
+unset __mamba_setup
+# <<< mamba initialize <<<
+
+micromamba self-update
+
+micromamba create -n slime python=3.12 pip -c conda-forge -y
+micromamba activate slime
+export CUDA_HOME="$CONDA_PREFIX"
+
+export BASE_DIR=${BASE_DIR:-"/root"}
+cd $BASE_DIR
+# install sglang
+git clone -b v0.5.0rc0 https://github.com/sgl-project/sglang.git
+cd sglang
+# Install the python packages
+pip install -e "python[all]"
+
+# install cuda 12.8 as it's the default cuda version for torch
+micromamba install -n slime cuda cuda-nvtx cuda-nvtx-dev -c nvidia/label/cuda-12.8.0 -y
+micromamba install -n slime -c conda-forge cudnn -y
+pip install cmake ninja
+
+# reinstall sglang deps
+pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-cache-dir --force-reinstall --no-build-isolation
+
+# install megatron deps
+TORCH_CUDA_ARCH_LIST="9.0;9.0a" \
+  pip -v install --no-build-isolation \
+  git+https://github.com/fanshiqing/grouped_gemm@v1.1.4
+# apex
+TORCH_CUDA_ARCH_LIST="9.0;9.0a" NVCC_APPEND_FLAGS="--threads 4" \
+\
+  pip -v install --disable-pip-version-check --no-cache-dir \
+  --no-build-isolation \
+  --config-settings "--build-option=--cpp_ext --cuda_ext --parallel 8" git+https://github.com/NVIDIA/apex.git
+# transformer engine
+TORCH_CUDA_ARCH_LIST="9.0;9.0a" \
+  pip -v install transformer_engine[pytorch]
+# flash attn
+# the newest version megatron supports is v2.7.4.post1
+MAX_JOBS=64 pip -v install flash-attn==2.7.4.post1
+# megatron
+cd $BASE_DIR
+git clone https://github.com/NVIDIA/Megatron-LM.git
+cd Megatron-LM/
+git checkout 48406695c4efcf1026a7ed70bb390793918dd97b
+pip install -e .
+
+# mbridge
+pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
+
+# install slime and apply patches
+cd $BASE_DIR
+git clone https://github.com/THUDM/slime.git
+cd slime/
+pip install -e .
+# apply patch
+cd $BASE_DIR/sglang
+git apply $BASE_DIR/slime/docker/patch/v0.5.0rc0-cuda126/sglang.patch
+cd $BASE_DIR/Megatron-LM
+git apply $BASE_DIR/slime/docker/patch/v0.5.0rc0-cuda126/megatron.patch
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -13,7 +13,6 @@ RUN pip install sglang-router --force-reinstall
 RUN pip install git+https://github.com/fzyzcjy/torch_memory_saver.git --no-cache-dir --force-reinstall
 RUN pip install ray[default]
 RUN pip install httpx[http2] wandb pylatexenc blobfile accelerate "mcp[cli]"
-RUN pip install git+https://github.com/zhuzilin/cumem_allocator.git
 
 # mbridge
 RUN pip install git+https://github.com/ISEEKYAN/mbridge.git --no-deps
diff --git a/docs/en/build.md b/docs/en/build.md
diff --git a/docs/en/quick_start.md b/docs/en/quick_start.md
@@ -8,7 +8,7 @@ This document will guide you through setting up the environment and getting star
 
 Since slime may contain temporary patches for sglang/megatron, to avoid potential environment configuration issues, we strongly recommend **users to use our latest Docker image**, which comes pre-configured with all dependencies.
 
-- For scenarios where Docker is not convenient, please refer to [Building Environment from Scratch](./build.md);
+- For scenarios where Docker is not convenient, please refer to [build_conda.sh](./../../build_conda.sh);
 - For AMD support, please refer to [AMD Usage Tutorial](./amd_tutorial.md).
 
 ### Pull and Start Docker Container
@@ -554,5 +554,4 @@ ray job submit --address="http://127.0.0.1:8265" \
 slime has been deeply optimized for distributed training of large-scale Mixture of Experts (MoE) models. We provide some end-to-end training cases for reference:
 
 - [Example: 64xH100 Training GLM-4.5](models/glm4.5-355B-A32B.md)
-- [Example: 8xH100 Training Qwen3-30B-A3B](models/qwen3-30B-A3B.md)
 - [Example: 128xH100 Training DeepSeek-R1](models/deepseek-r1.md) 
diff --git a/docs/zh/build.md b/docs/zh/build.md
diff --git a/docs/zh/quick_start.md b/docs/zh/quick_start.md