baidu · liwei109 · May 19, 2026 · May 18, 2026 · May 19, 2026
diff --git a/vllm_kunlun/ops/fla/utils.py b/vllm_kunlun/ops/fla/utils.py
@@ -121,12 +121,12 @@ def get_available_device() -> str:
 
 
 @functools.cache
-def _check_platform() -> Literal["nvidia", "amd", "intel", "musa"]:
+def _check_platform() -> Literal["nvidia", "amd", "kunlun"]:
     device = get_available_device()
     mapping = {
         "cuda": "nvidia",
         "hip": "amd",
-        "xpu": "intel",
+        "xpu": "kunlun",
     }
     # return the mapped value, or the original if not found
     return mapping.get(device, device)

diff --git a/vllm_kunlun/v1/attention/backends/kunlun_attn.py b/vllm_kunlun/v1/attention/backends/kunlun_attn.py
@@ -16,7 +16,6 @@
 #
 import copy
 from dataclasses import dataclass
-from itertools import accumulate
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -603,13 +602,6 @@ def build(
         seq_lens = common_attn_metadata.seq_lens
         seq_lens_cpu = common_attn_metadata.seq_lens_cpu
 
-        seq_start_loc = list(accumulate(seq_lens, initial=0))
-
-        seq_start_loc_tensor = torch.empty(
-            len(seq_start_loc), dtype=torch.int32, device=self.device
-        )
-        seq_start_loc_tensor.copy_(torch.as_tensor(seq_start_loc, dtype=torch.int32))
-
         kv_lod_cpu = torch.zeros(num_reqs + 1, dtype=torch.int32, device="cpu")
         kv_lod_cpu[1:] = seq_lens_cpu.to(torch.int32).cumsum(dim=0)
         kv_lod_xpu = kv_lod_cpu.to(self.device)

diff --git a/vllm_kunlun/v1/sample/ops/topk_topp_sampler.py b/vllm_kunlun/v1/sample/ops/topk_topp_sampler.py
@@ -169,8 +169,7 @@ def random_sample(
             q = q.clamp(min=1e-12)
         else:
             for i, generator in generators.items():
-                q[i].exponential_(generator=generator)
-
+                torch.ops.xspeedgate_ops.inplace_exponential(q[i], generator=generator)
     return probs.div_(q).argmax(dim=-1).view(-1)