Skip to content

Commit 2f0110b

Browse files
committed
fix
Signed-off-by: wangli <[email protected]>
1 parent 0a8b56e commit 2f0110b

File tree

4 files changed

+7
-382
lines changed

4 files changed

+7
-382
lines changed

tests/ut/worker/test_input_batch.py

Lines changed: 0 additions & 375 deletions
This file was deleted.

vllm_ascend/attention/mla_v1.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, ACL_FORMAT_FRACTAL_NZ,
4242
flashcomm2_o_shared_enabled, is_enable_nz,
4343
weak_ref_tensors)
44-
from vllm_ascend.worker.npu_input_batch import InputBatch
44+
from vllm_ascend.worker.npu_input_batch import NPUInputBatch
4545

4646
if TYPE_CHECKING:
4747
from vllm.v1.core.sched.output import SchedulerOutput
@@ -280,7 +280,7 @@ def __init__(self,
280280
dtype=torch.uint8,
281281
device=device)
282282

283-
def reorder_batch(self, input_batch: "InputBatch",
283+
def reorder_batch(self, input_batch: "NPUInputBatch",
284284
scheduler_output: "SchedulerOutput") -> bool:
285285
# We now want to reorder the batch so that the "decode" requests are at
286286
# the front and the "prefill" requests are at the using the least amount

0 commit comments

Comments
 (0)