Skip to content

Commit 5e0ada5

Browse files
[Bugfix] Fix the attn_metadata is None (#5038)
### What this PR does / why we need it? Fix the bug " TypeError: 'NoneType' object is not iterable' " in vllm_ascend/compilation/acl_graph.py The reason of that is the attn_metadata is none in the dummy_run of MTP. - vLLM version: v0.12.0 - vLLM main: vllm-project/vllm@ad32e3e Signed-off-by: chenmenglong <[email protected]>
1 parent d43cabc commit 5e0ada5

File tree

6 files changed

+8
-10
lines changed

6 files changed

+8
-10
lines changed

vllm_ascend/spec_decode/eagle_proposer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def load_model(self, model: nn.Module) -> None:
117117
def dummy_run(self,
118118
num_tokens: int,
119119
with_prefill: bool = False,
120-
skip_attn: bool = False,
120+
in_graph_capturing: bool = False,
121121
num_reqs: int = 0,
122122
num_tokens_across_dp: Optional[torch.Tensor] = None,
123123
aclgraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE,

vllm_ascend/spec_decode/interface.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def load_model(self, model):
3232
def dummy_run(self,
3333
num_tokens: int,
3434
with_prefill: bool = False,
35-
skip_attn: bool = False,
35+
in_graph_capturing: bool = False,
3636
num_reqs: int = 0,
3737
num_tokens_across_dp: Optional[torch.Tensor] = None,
3838
aclgraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE,

vllm_ascend/spec_decode/mtp_proposer.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def load_model(self, model) -> None:
223223
def dummy_run(self,
224224
num_tokens: int,
225225
with_prefill: bool = False,
226-
skip_attn: bool = False,
226+
in_graph_capturing: bool = False,
227227
num_reqs: int = 0,
228228
num_tokens_across_dp=None,
229229
aclgraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE,
@@ -247,9 +247,7 @@ def dummy_run(self,
247247
moe_comm_type = (MoECommType.ALLTOALL if moe_comm_type
248248
== MoECommType.FUSED_ALLTOALL else moe_comm_type)
249249

250-
if skip_attn:
251-
attn_metadata = None
252-
elif aclgraph_runtime_mode == CUDAGraphMode.FULL:
250+
if aclgraph_runtime_mode == CUDAGraphMode.FULL:
253251
if len(self.runner.attn_groups) > 0:
254252
num_computed_tokens_cpu = (
255253
self.runner.input_batch.
@@ -294,7 +292,7 @@ def dummy_run(self,
294292
positions = self.positions[:num_tokens]
295293
previous_hidden_states = self.hidden_states[:num_tokens]
296294
for i in range(self.num_speculative_tokens):
297-
if i > 0 and not skip_attn and aclgraph_runtime_mode == CUDAGraphMode.FULL:
295+
if i > 0 and not in_graph_capturing and aclgraph_runtime_mode == CUDAGraphMode.FULL:
298296
aclgraph_runtime_mode = CUDAGraphMode.NONE
299297
with set_ascend_forward_context(
300298
attn_metadata,

vllm_ascend/spec_decode/ngram_proposer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def load_model(self, *args, **kwargs):
2222
def dummy_run(self,
2323
num_tokens,
2424
with_prefill=None,
25-
skip_attn=None,
25+
in_graph_capturing=None,
2626
num_reqs=None,
2727
num_tokens_across_dp=None,
2828
aclgraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE,

vllm_ascend/spec_decode/suffix_proposer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def load_model(self, *args, **kwargs):
2222
def dummy_run(self,
2323
num_tokens,
2424
with_prefill=None,
25-
skip_attn=None,
25+
in_graph_capturing=None,
2626
num_reqs=None,
2727
num_tokens_across_dp=None,
2828
aclgraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE,

vllm_ascend/worker/model_runner_v1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2296,7 +2296,7 @@ def dummy_drafter_compute_logits(hidden_states):
22962296
aclgraph_runtime_mode=aclgraph_runtime_mode,
22972297
batch_descriptor=batch_descriptor,
22982298
dummy_compute_logits=dummy_drafter_compute_logits,
2299-
skip_attn=not force_attention)
2299+
in_graph_capturing=not force_attention)
23002300
if self.in_profile_run and self.dynamic_eplb:
23012301
self.model.clear_all_moe_loads()
23022302
if not self.in_profile_run and self.dynamic_eplb:

0 commit comments

Comments
 (0)