Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions 3rdparty/amd/wheel/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,29 @@ pip install "amd-sglang[all-hip,rocm720]" -i https://pypi.amd.com/rocm-7.2.0/sim

Note: You must resolve the two dependencies, AITER and triton, below. Others are optional depending on your applications.

## JIT Kernel Support

The amd-sglang wheel includes JIT (Just-In-Time) kernel compilation support. JIT kernels allow for dynamic compilation of optimized CUDA/HIP kernels at runtime.

### Requirements

JIT kernel compilation requires:
1. **apache-tvm-ffi** - Included in the `runtime_common` dependencies (installed with `amd-sglang[all-hip,...]`)
2. **System compiler toolchain** - A C++ compiler compatible with your ROCm installation
- For ROCm environments, this is typically provided by the ROCm installation
- Ensure `hipcc` is available in your PATH

The JIT kernel source files (`.cuh`, `.cu` headers) are bundled with the wheel and will be available at runtime for compilation.

### Verification

To verify JIT kernel support is working:
```python
from sglang.jit_kernel.utils import KERNEL_PATH
print(f"JIT kernel path: {KERNEL_PATH}")
# Should print the path to site-packages/sglang/jit_kernel
```

## Manual Dependency Resolution

### Resolving AITER
Expand Down
97 changes: 38 additions & 59 deletions 3rdparty/amd/wheel/sglang/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,16 @@ dependencies = ["aiohttp", "requests", "tqdm", "numpy", "IPython", "setproctitle
runtime_common = [
"IPython",
"aiohttp",
"apache-tvm-ffi>=0.1.5,<0.2",
"anthropic>=0.20.0",
"blobfile==3.0.0",
"av",
"build",
"compressed-tensors",
"decord2",
"datasets",
"einops",
"fastapi",
"gguf",
"hf_transfer",
"huggingface_hub",
"interegular",
"llguidance>=0.7.11,<0.8.0",
"modelscope",
Expand Down Expand Up @@ -59,7 +58,8 @@ runtime_common = [
"timm==1.0.16",
"torchao==0.9.0",
"tqdm",
"transformers==4.57.1",
"mistral_common>=1.9.0",
"transformers==5.3.0",
"uvicorn",
"uvloop",
"xgrammar==0.1.32",
Expand All @@ -76,7 +76,7 @@ rocm700 = [
"torchaudio @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.0.2/torchaudio-2.9.0%2Brocm7.0.2.gite3c6ee2b-cp310-cp310-linux_x86_64.whl",
"torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.0.2/torchvision-0.24.0%2Brocm7.0.2.gitb919bd0c-cp310-cp310-linux_x86_64.whl",
"mooncake-transfer-engine-non-cuda==0.3.8.post1",
"sglang-kernel @ https://github.com/sgl-project/whl/releases/download/v0.4.0/sglang_kernel-0.4.0+rocm700-cp310-abi3-manylinux2014_x86_64.whl",
"sglang-kernel @ https://github.com/sgl-project/whl/releases/download/v0.4.1/sglang_kernel-0.4.1+rocm700-cp310-abi3-manylinux2014_x86_64.whl",
]

rocm720 = [
Expand All @@ -85,50 +85,10 @@ rocm720 = [
"torchaudio @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torchaudio-2.9.0%2Brocm7.2.0.gite3c6ee2b-cp310-cp310-linux_x86_64.whl",
"torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torchvision-0.24.0%2Brocm7.2.0.gitb919bd0c-cp310-cp310-linux_x86_64.whl",
"mooncake-transfer-engine-non-cuda==0.3.8.post1",
"sglang-kernel @ https://github.com/sgl-project/whl/releases/download/v0.4.0/sglang_kernel-0.4.0+rocm720-cp310-abi3-manylinux2014_x86_64.whl",
"sglang-kernel @ https://github.com/sgl-project/whl/releases/download/v0.4.1/sglang_kernel-0.4.1+rocm720-cp310-abi3-manylinux2014_x86_64.whl",
]

# HIP (Heterogeneous-computing Interface for Portability) for AMD
# Install with one of:
# pip install "amd-sglang[srt_hip,rocm700]"
# pip install "amd-sglang[srt_hip,rocm720]"
srt_hip = [
"amd-sglang[runtime_common]",
"petit_kernel==0.0.2",
"wave-lang==3.8.2",
]

diffusion_hip = [
"PyYAML==6.0.1",
"cloudpickle",
"diffusers==0.37.0",
"imageio==2.36.0",
"imageio-ffmpeg==0.5.1",
"moviepy>=2.0.0",
"opencv-python-headless==4.10.0.84",
"remote-pdb",
"st_attn==0.0.7",
"vsa==0.0.4",
"runai_model_streamer>=0.15.5",
"cache-dit==1.1.8",
"addict",
]

# For Intel Gaudi(device : hpu) follow the installation guide
# https://docs.vllm.ai/en/latest/getting_started/gaudi-installation.html
srt_hpu = ["sglang[runtime_common]"]

# https://docs.sglang.io/platforms/mthreads_gpu.md
srt_musa = [
"sglang[runtime_common]",
"torch",
"torch_musa",
"torchada>=0.1.45",
"mthreads-ml-py",
"numpy<2.0",
]

diffusion_musa = [
diffusion_common = [
"PyYAML==6.0.1",
"cloudpickle",
"diffusers==0.37.0",
Expand All @@ -137,11 +97,10 @@ diffusion_musa = [
"moviepy>=2.0.0",
"opencv-python-headless==4.10.0.84",
"remote-pdb",
"st_attn==0.0.7",
"vsa==0.0.4",
"runai_model_streamer>=0.15.5",
"cache-dit==1.1.8",
"addict",
"scikit-image==0.25.2",
"trimesh>=4.0.0",
"xatlas",
]

tracing = [
Expand All @@ -151,33 +110,53 @@ tracing = [
"opentelemetry-exporter-otlp-proto-grpc",
]

# HIP (Heterogeneous-computing Interface for Portability) for AMD
# Install with one of:
# pip install "amd-sglang[srt_hip,rocm700]"
# pip install "amd-sglang[srt_hip,rocm720]"
srt_hip = [
"amd-sglang[runtime_common]",
"petit_kernel==0.0.2",
"wave-lang==3.8.2",
]

diffusion_hip = [
"amd-sglang[diffusion_common]",
"peft>=0.18.0",
"st_attn==0.0.7",
"vsa==0.0.4",
"runai_model_streamer>=0.15.7",
"cache-dit==1.3.0",
]

test = [
"accelerate",
"addict",
"bitsandbytes",
"expecttest",
"gguf",
"jsonlines",
"matplotlib",
"pandas",
"peft",
"parameterized",
"peft>=0.18.0",
"polars",
"pytest",
"pytest-cov",
"diff-cover",
"sentence_transformers",
"tabulate",
]

all_hip = ["amd-sglang[srt_hip]", "amd-sglang[diffusion_hip]"]
all_hpu = ["sglang[srt_hpu]"]
all_musa = ["sglang[srt_musa]", "sglang[diffusion_musa]"]

all_hip = ["amd-sglang[srt_hip]", "amd-sglang[diffusion_hip]", "amd-sglang[tracing]"]
dev_hip = ["amd-sglang[all_hip]", "amd-sglang[test]"]
dev_hpu = ["sglang[all_hpu]", "sglang[test]"]
dev_musa = ["sglang[all_musa]", "sglang[test]"]

[project.urls]
"Homepage" = "https://github.com/sgl-project/sglang"
"Bug Tracker" = "https://github.com/sgl-project/sglang/issues"

[project.scripts]
sglang = "sglang.cli.main:main"
killall_sglang = "sglang.cli.killall:main"

[tool.setuptools.package-data]
"sglang" = [
Expand Down
Loading