Skip to content

Commit 418ef1e

Browse files
mmangkadKangyan-Zhou
authored andcommitted
[FlashInfer] Bump FlashInfer version from 0.6.3 to 0.6.4 (sgl-project#19005)
1 parent 8c3fd6c commit 418ef1e

7 files changed

Lines changed: 12 additions & 15 deletions

File tree

.github/workflows/release-docker-cu13-framework.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ on:
1010
description: "Version to build (without v prefix, e.g., 0.5.8)"
1111
required: true
1212
flashinfer_version:
13-
description: "FlashInfer version (default: 0.6.3)"
13+
description: "FlashInfer version (default: 0.6.4)"
1414
required: false
15-
default: "0.6.3"
15+
default: "0.6.4"
1616

1717
jobs:
1818
publish-x86:

docker/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ARG PIP_DEFAULT_INDEX
1919
ARG UBUNTU_MIRROR
2020
ARG GITHUB_ARTIFACTORY=github.com
2121
ARG INSTALL_FLASHINFER_JIT_CACHE=0
22-
ARG FLASHINFER_VERSION=0.6.3
22+
ARG FLASHINFER_VERSION=0.6.4
2323
ARG MOONCAKE_VERSION=0.3.9
2424
#if need other arg please add in MOONCAKE_COMPILE_ARG
2525
ARG MOONCAKE_COMPILE_ARG="-DUSE_HTTP=ON -DUSE_MNNVL=ON -DUSE_CUDA=ON -DWITH_EP=ON"

python/pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ dependencies = [
2727
"datasets",
2828
"einops",
2929
"fastapi",
30-
"flashinfer_python==0.6.3", # keep it aligned with jit-cache version in Dockerfile
31-
"flashinfer_cubin==0.6.3",
30+
"flashinfer_python==0.6.4", # keep it aligned with jit-cache version in Dockerfile
31+
"flashinfer_cubin==0.6.4",
3232
"gguf",
3333
"hf_transfer",
3434
"huggingface_hub",

python/sglang/srt/entrypoints/engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -879,7 +879,7 @@ def _set_envs_and_config(server_args: ServerArgs):
879879
if server_args.attention_backend == "flashinfer":
880880
assert_pkg_version(
881881
"flashinfer_python",
882-
"0.6.3",
882+
"0.6.4",
883883
"Please uninstall the old version and "
884884
"reinstall the latest version by following the instructions "
885885
"at https://docs.flashinfer.ai/installation.html.",

python/sglang/srt/server_args.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
from sglang.srt.utils.common import (
3636
LORA_TARGET_ALL_MODULES,
3737
SUPPORTED_LORA_TARGET_MODULES,
38-
check_pkg_version_at_least,
3938
configure_ipv6,
4039
cpu_has_amx_support,
4140
get_bool_env_var,
@@ -1745,12 +1744,10 @@ def _handle_model_specific_adjustments(self):
17451744
and self.moe_a2a_backend == "none"
17461745
and self.moe_runner_backend == "auto"
17471746
):
1748-
# Only enable flashinfer_trtllm if flashinfer-python version is >= 0.6.3
1749-
if check_pkg_version_at_least("flashinfer-python", "0.6.3"):
1750-
self.moe_runner_backend = "flashinfer_trtllm"
1751-
logger.info(
1752-
"Use flashinfer_trtllm as MoE runner backend on sm100 for Glm4MoeForCausalLM"
1753-
)
1747+
self.moe_runner_backend = "flashinfer_trtllm"
1748+
logger.info(
1749+
"Use flashinfer_trtllm as MoE runner backend on sm100 for Glm4MoeForCausalLM"
1750+
)
17541751

17551752
elif model_arch in [
17561753
"FalconH1ForCausalLM",

python/sglang/srt/utils/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1114,7 +1114,7 @@ def check_pkg_version_at_least(pkg: str, min_version: str) -> bool:
11141114
11151115
Args:
11161116
pkg: Package name (distribution name, e.g., "flashinfer-python")
1117-
min_version: Minimum version required (e.g., "0.6.3")
1117+
min_version: Minimum version required (e.g., "0.6.4")
11181118
11191119
Returns:
11201120
True if package is installed and version >= min_version, False otherwise

scripts/ci/cuda/ci_install_dependency.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ set -euxo pipefail
55
# Set up environment variables
66
IS_BLACKWELL=${IS_BLACKWELL:-0}
77
CU_VERSION="cu129"
8-
FLASHINFER_VERSION=0.6.3
8+
FLASHINFER_VERSION=0.6.4
99
OPTIONAL_DEPS="${1:-}"
1010

1111
# Detect system architecture

0 commit comments

Comments
 (0)