Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/release-docker-cu13-framework.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ on:
description: "Version to build (without v prefix, e.g., 0.5.8)"
required: true
flashinfer_version:
description: "FlashInfer version (default: 0.6.3)"
description: "FlashInfer version (default: 0.6.4)"
required: false
default: "0.6.3"
default: "0.6.4"

jobs:
publish-x86:
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ ARG PIP_DEFAULT_INDEX
ARG UBUNTU_MIRROR
ARG GITHUB_ARTIFACTORY=github.com
ARG INSTALL_FLASHINFER_JIT_CACHE=0
ARG FLASHINFER_VERSION=0.6.3
ARG FLASHINFER_VERSION=0.6.4
ARG MOONCAKE_VERSION=0.3.9
#if need other arg please add in MOONCAKE_COMPILE_ARG
ARG MOONCAKE_COMPILE_ARG="-DUSE_HTTP=ON -DUSE_MNNVL=ON -DUSE_CUDA=ON -DWITH_EP=ON"
Expand Down
4 changes: 2 additions & 2 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ dependencies = [
"datasets",
"einops",
"fastapi",
"flashinfer_python==0.6.3", # keep it aligned with jit-cache version in Dockerfile
"flashinfer_cubin==0.6.3",
"flashinfer_python==0.6.4", # keep it aligned with jit-cache version in Dockerfile
"flashinfer_cubin==0.6.4",
"gguf",
"hf_transfer",
"huggingface_hub",
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/entrypoints/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -847,7 +847,7 @@ def _set_envs_and_config(server_args: ServerArgs):
if server_args.attention_backend == "flashinfer":
assert_pkg_version(
"flashinfer_python",
"0.6.3",
"0.6.4",
"Please uninstall the old version and "
"reinstall the latest version by following the instructions "
"at https://docs.flashinfer.ai/installation.html.",
Expand Down
4 changes: 2 additions & 2 deletions python/sglang/srt/server_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -1614,8 +1614,8 @@ def _handle_model_specific_adjustments(self):
and self.moe_a2a_backend == "none"
and self.moe_runner_backend == "auto"
):
# Only enable flashinfer_trtllm if flashinfer-python version is >= 0.6.3
if check_pkg_version_at_least("flashinfer-python", "0.6.3"):
# Only enable flashinfer_trtllm if flashinfer-python version is >= 0.6.4
if check_pkg_version_at_least("flashinfer-python", "0.6.4"):
self.moe_runner_backend = "flashinfer_trtllm"
logger.info(
"Use flashinfer_trtllm as MoE runner backend on sm100 for Glm4MoeForCausalLM"
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1114,7 +1114,7 @@ def check_pkg_version_at_least(pkg: str, min_version: str) -> bool:

Args:
pkg: Package name (distribution name, e.g., "flashinfer-python")
min_version: Minimum version required (e.g., "0.6.3")
min_version: Minimum version required (e.g., "0.6.4")

Returns:
True if package is installed and version >= min_version, False otherwise
Expand Down
2 changes: 1 addition & 1 deletion scripts/ci/cuda/ci_install_dependency.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euxo pipefail
# Set up environment variables
IS_BLACKWELL=${IS_BLACKWELL:-0}
CU_VERSION="cu129"
FLASHINFER_VERSION=0.6.3
FLASHINFER_VERSION=0.6.4
OPTIONAL_DEPS="${1:-}"

# Detect system architecture
Expand Down
Loading