Skip to content

Commit ca63587

Browse files
committed
Minor fix
Signed-off-by: yizhang-nv <187001205+yizhang-nv@users.noreply.github.com>
1 parent e76e4f7 commit ca63587

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

tensorrt_llm/_torch/attention_backend/trtllm.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -726,7 +726,6 @@ def _post_init_with_buffers(self, buffers) -> None:
726726
capture_graph=capture_graph,
727727
)
728728
self.host_kv_cache_block_offsets = self.kv_cache_manager.host_kv_cache_block_offsets
729-
assert self.host_kv_cache_block_offsets.shape == self.kv_cache_block_offsets.shape, f"host_kv_cache_block_offsets and kv_cache_block_offsets should have the same shape, but got {self.host_kv_cache_block_offsets.shape} and {self.kv_cache_block_offsets.shape}"
730729
self.block_ids_per_seq = None
731730
self.kv_block_ids_per_seq = None
732731
if self.enable_flash_mla:

tensorrt_llm/_torch/pyexecutor/resource_manager.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,11 +1506,12 @@ def append_to_kv_heads_per_layer(num_kv_heads_per_layer: List[int],
15061506

15071507
self.enable_block_reuse = kv_cache_config.enable_block_reuse
15081508

1509-
self.index_mapper = IndexMapper(max_batch_size, max_beam_width)
1509+
# Plus 1 for cuda graph dummy request
1510+
self.index_mapper = IndexMapper(max_batch_size + 1, max_beam_width)
15101511

15111512
self.host_kv_cache_block_offsets = torch.empty(
15121513
self.num_pools,
1513-
max_batch_size * max_beam_width,
1514+
(max_batch_size + 1) * max_beam_width,
15141515
2, # key and value
15151516
self.max_blocks_per_seq,
15161517
dtype=torch.int32,

0 commit comments

Comments
 (0)