baidu · Lidang-Jiang · Apr 10, 2026 · Apr 21, 2026 · Copilot · Apr 14, 2026
diff --git a/.github/workflows/ut.yml b/.github/workflows/ut.yml
@@ -40,7 +40,7 @@
 
 #       - name: Install vLLM
 #         run: |
-#           pip install vllm==0.11.0 --no-build-isolation --no-deps --no-deps --index-url https://pip.baidu-int.com/simple/
+#           pip install vllm==0.19.0 --no-build-isolation --no-deps --no-deps --index-url https://pip.baidu-int.com/simple/
 
 #       - name: Run Unit Test
 #         run: |

diff --git a/README.md b/README.md
@@ -197,7 +197,7 @@ Please use the following recommended versions to get started quickly:
 
 | Version | Release type | Doc |
 |----------|---------------|-----|
-| v0.15.1 | Latest development version | [QuickStart](https://vllm-kunlun.readthedocs.io/en/latest/quick_start.html) and [Installation](https://vllm-kunlun.readthedocs.io/en/latest/installation.html) for more details |
+| v0.19.0 | Target development version | [QuickStart](https://vllm-kunlun.readthedocs.io/en/latest/quick_start.html) and [Installation](https://vllm-kunlun.readthedocs.io/en/latest/installation.html) for more details |
 
 ---
 

diff --git a/ci/scripts/docker/start_docker.sh b/ci/scripts/docker/start_docker.sh
@@ -76,7 +76,7 @@ docker run \
   --name="${DOCKER_NAME}" \
   -v /home:/home \
   -v "${WORKSPACE_MOUNT}" \
-  -v /ssd2:/ssd2 \
+  -v /ssd1:/ssd1 \
   -v /ssd1:/ssd1 \
-  -v /ssd1:/ssd1 \
-  -v /ssd1:/ssd1 \
   -v /ssd3:/ssd3 \
   -v /dev/shm:/dev/shm \

diff --git a/ci/scripts/env/install_env.sh b/ci/scripts/env/install_env.sh
@@ -71,6 +71,7 @@ docker exec "${DOCKER_NAME}" bash -lc "
   pip install \
     \"https://baidu-kunlun-public.su.bcebos.com/v1/baidu-kunlun-share/1130/xtorch_ops-0.1.2209%2B6752ad20-cp310-cp310-linux_x86_64.whl?authorization=bce-auth-v1%2FALTAKypXxBzU7gg4Mk4K4c6OYR%2F2025-12-05T06%3A18%3A00Z%2F-1%2Fhost%2F14936c2b7e7c557c1400e4c467c79f7a9217374a7aa4a046711ac4d948f460cd\"
 
+  # TODO: replace this transitional Triton wheel once the 0.19.0 artifact is published.
   pip install \
     \"https://cce-ai-models.bj.bcebos.com/v1/vllm-kunlun-0.11.0/triton-3.0.0%2Bb2cde523-cp310-cp310-linux_x86_64.whl\"
 
@@ -87,14 +88,14 @@ docker exec "${DOCKER_NAME}" bash -lc "
   source \"${GITHUB_WORKSPACE}/vLLM-Kunlun/setup_env.sh\"
 
   ########################################
-  # 3. Install upstream vLLM 0.11.0
+  # 3. Install upstream vLLM 0.19.0
   ########################################
-  echo '===== Installing vLLM==0.11.0 ====='
+  echo '===== Installing vLLM==0.19.0 ====='
 
   pip uninstall -y vllm || true
   env | grep -i proxy || true
 
-  pip install vllm==0.11.0 \
+  pip install vllm==0.19.0 \
     --no-build-isolation \
     --no-deps \
     --index-url https://pip.baidu-int.com/simple/

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -65,17 +65,17 @@
     # the branch of vllm, used in vllm clone
     # - main branch: 'main'
     # - vX.Y.Z branch: 'vX.Y.Z'
-    "vllm_version": "v0.15.1",
+    "vllm_version": "v0.19.0",
     # the branch of vllm-kunlun, used in vllm-kunlun clone and image tag
     # - main branch: 'main'
     # - vX.Y.Z branch: latest vllm-kunlun release tag
-    "vllm_kunlun_version": "v0.15.1",
+    "vllm_kunlun_version": "main",
     # the newest release version of vllm-kunlun and matched vLLM, used in pip install.
     # This value should be updated when cut down release.
-    "pip_vllm_kunlun_version": "0.15.1",
-    "pip_vllm_version": "0.15.1",
+    "pip_vllm_kunlun_version": "0.19.0",
+    "pip_vllm_version": "0.19.0",
     # vllm version in ci
-    "ci_vllm_version": "v0.15.1",
+    "ci_vllm_version": "v0.19.0",
 }
 
 # For cross-file header anchors
@@ -116,7 +116,7 @@
 html_theme_options = {
     "path_to_docs": "docs/source",
     "repository_url": "https://github.com/baidu/vLLM-Kunlun",
-    "repository_branch": "v0.15.1-dev",
+    "repository_branch": "main",
     "use_repository_button": True,
     "use_edit_page_button": True,
 }

diff --git a/docs/source/faqs.md b/docs/source/faqs.md
@@ -2,7 +2,7 @@
 
 ## Version Specific FAQs
 
-- [[v0.15.1] FAQ & Feedback]
+- [[v0.19.0] FAQ & Feedback]
 
 ## General FAQs
 
@@ -24,7 +24,7 @@ We will support the kunlun4 M100 platform in early 2026.
 
 ### 3. How vllm-kunlun work with vLLM?
 
-vllm-kunlun is a hardware plugin for vLLM. Basically, the version of vllm-kunlun is the same as the version of vllm. For example, if you use vllm 0.15.1, you should use vllm-kunlun 0.15.1 as well. For main branch, we will make sure `vllm-kunlun` and `vllm` are compatible by each commit.
+vllm-kunlun is a hardware plugin for vLLM. Basically, the version of vllm-kunlun is the same as the version of vllm. For example, if you use vllm 0.19.0, you should use vllm-kunlun 0.19.0 as well. For main branch, we will make sure `vllm-kunlun` and `vllm` are compatible by each commit.
 
 ### 4. How to handle the out-of-memory issue?
 

diff --git a/docs/source/installation.md b/docs/source/installation.md
@@ -53,10 +53,10 @@ docker run -itd ${DOCKER_DEVICE_CONFIG} \
 ::::
 :::::
 ## Install vLLM-kunlun
-### Install vLLM 0.15.1
+### Install vLLM 0.19.0
 
 ```
-uv pip install vllm==0.15.1 --no-build-isolation --no-deps
+uv pip install vllm==0.19.0 --no-build-isolation --no-deps
 ```
 
 ### Build and Install
@@ -67,13 +67,15 @@ git clone https://github.com/baidu/vLLM-Kunlun
 
 cd vLLM-Kunlun
 
-git checkout v0.15.1-dev
+git checkout main
 
 uv pip install -r requirements.txt
 
 python setup.py build
 
 python setup.py install
+
+python vllm_kunlun/patches/patch_torch251.py
 ```
 
 ### Replace eval_frame.py

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,27 +4,32 @@ build-backend = "hatchling.build"
 
 [project]
 name = "vllm-kunlun"
-version = "0.15.1.dev0"
+version = "0.19.0"
 description = "vLLM Kunlun3 backend plugin"
 readme = "README.md"
 requires-python = ">=3.10"
 license = { text = "MIT" }
 authors = [{ name = "kunlun"}]
 dependencies = []
 
-[project.scripts]
-vllm-kunlun = "vllm_kunlun.cmdline:main"
-
 [project.entry-points."vllm.platform_plugins"]
 kunlun = "vllm_kunlun:register"
 
 [project.entry-points."vllm.general_plugins"]
 kunlun_model = "vllm_kunlun:register_model"
+kunlun_quant = "vllm_kunlun.models:register_quant_method"
+
+[project.entry-points."vllm.plugins"]
+kunlun_fused_moe = "vllm_kunlun.ops.fused_moe:register_kunlun_fused_moe_ops"
 
 [tool.hatch.build]
 packages = ["vllm_kunlun"]
 include = ["vllm_kunlun/conf/*", "vllm_kunlun/data/*"]
 
+[tool.hatch.build.targets.sdist]
+artifacts = ["vllm_kunlun/*.so"]
+
 [tool.hatch.build.targets.wheel]
 packages = ["vllm_kunlun"]
+artifacts = ["vllm_kunlun/*.so"]
 output-dir = "output/dist"
diff --git a/setup.py b/setup.py
@@ -30,8 +30,9 @@ def run(self):
         for ext in self.extensions:
             ext_path = self.get_ext_fullpath(ext.name)
             file_name = os.path.basename(ext_path)
-            target_path = os.path.join("vllm_kunlun", file_name)
+            target_path = os.path.join(ROOT_DIR, "vllm_kunlun", file_name)
 
+            os.makedirs(os.path.dirname(target_path), exist_ok=True)
             if os.path.exists(target_path):
                 os.remove(target_path)
             shutil.copyfile(ext_path, target_path)
@@ -42,12 +43,12 @@ def run(self):
 
     setup(
         name="vllm_kunlun",
-        version="0.15.1",
+        version="0.19.0",
         author="vLLM-Kunlun team",
         license="Apache 2.0",
         description="vLLM Kunlun3 backend plugin",
         packages=find_packages(exclude=("docs", "examples", "tests*")),
-        package_data={"vllm_kunlun": ["_kunlun.so", "so/*.so", "include/*.h"]},
+        package_data={"vllm_kunlun": ["_kunlun*.so", "so/*.so", "include/*.h"]},
         python_requires=">=3.10",
         ext_modules=ext_modules,
         cmdclass={
@@ -63,6 +64,5 @@ def run(self):
             "vllm.plugins": [
                 "kunlun_fused_moe = vllm_kunlun.ops.fused_moe:register_kunlun_fused_moe_ops"
             ],
-            "console_scripts": ["vllm_kunlun = vllm_kunlun.entrypoints.main:main"],
         },
     )