Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/release-docker-cu13-framework.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ on:
description: "Version to build (without v prefix, e.g., 0.5.8)"
required: true
flashinfer_version:
description: "FlashInfer version (default: 0.6.3)"
description: "FlashInfer version (default: 0.6.4)"
required: false
default: "0.6.3"
default: "0.6.4"

jobs:
publish-x86:
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ ARG PIP_DEFAULT_INDEX
ARG UBUNTU_MIRROR
ARG GITHUB_ARTIFACTORY=github.com
ARG INSTALL_FLASHINFER_JIT_CACHE=0
ARG FLASHINFER_VERSION=0.6.3
ARG FLASHINFER_VERSION=0.6.4
ARG MOONCAKE_VERSION=0.3.9
#if need other arg please add in MOONCAKE_COMPILE_ARG
ARG MOONCAKE_COMPILE_ARG="-DUSE_HTTP=ON -DUSE_MNNVL=ON -DUSE_CUDA=ON -DWITH_EP=ON"
Expand Down
4 changes: 2 additions & 2 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ dependencies = [
"datasets",
"einops",
"fastapi",
"flashinfer_python==0.6.3", # keep it aligned with jit-cache version in Dockerfile
"flashinfer_cubin==0.6.3",
"flashinfer_python==0.6.4", # keep it aligned with jit-cache version in Dockerfile
"flashinfer_cubin==0.6.4",
"gguf",
"hf_transfer",
"huggingface_hub",
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/entrypoints/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -847,7 +847,7 @@ def _set_envs_and_config(server_args: ServerArgs):
if server_args.attention_backend == "flashinfer":
assert_pkg_version(
"flashinfer_python",
"0.6.3",
"0.6.4",
"Please uninstall the old version and "
"reinstall the latest version by following the instructions "
"at https://docs.flashinfer.ai/installation.html.",
Expand Down
4 changes: 2 additions & 2 deletions python/sglang/srt/server_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -1618,8 +1618,8 @@ def _handle_model_specific_adjustments(self):
and self.moe_a2a_backend == "none"
and self.moe_runner_backend == "auto"
):
# Only enable flashinfer_trtllm if flashinfer-python version is >= 0.6.3
if check_pkg_version_at_least("flashinfer-python", "0.6.3"):
Comment thread
mmangkad marked this conversation as resolved.
# Only enable flashinfer_trtllm if flashinfer-python version is >= 0.6.4
if check_pkg_version_at_least("flashinfer-python", "0.6.4"):
self.moe_runner_backend = "flashinfer_trtllm"
logger.info(
"Use flashinfer_trtllm as MoE runner backend on sm100 for Glm4MoeForCausalLM"
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1114,7 +1114,7 @@ def check_pkg_version_at_least(pkg: str, min_version: str) -> bool:

Args:
pkg: Package name (distribution name, e.g., "flashinfer-python")
min_version: Minimum version required (e.g., "0.6.3")
min_version: Minimum version required (e.g., "0.6.4")

Returns:
True if package is installed and version >= min_version, False otherwise
Expand Down
2 changes: 1 addition & 1 deletion scripts/ci/cuda/ci_install_dependency.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euxo pipefail
# Set up environment variables
IS_BLACKWELL=${IS_BLACKWELL:-0}
CU_VERSION="cu129"
FLASHINFER_VERSION=0.6.3
FLASHINFER_VERSION=0.6.4
OPTIONAL_DEPS="${1:-}"

# Detect system architecture
Expand Down
Loading