Skip to content

Commit a547fe3

Browse files
fix: sync with optimum-rbln fix (#15)
* fix: bach 1 case * ruff
1 parent a1235ee commit a547fe3

1 file changed

Lines changed: 3 additions & 1 deletion

File tree

vllm_rbln/worker/optimum_worker.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,9 @@ def determine_num_available_blocks(self) -> Tuple[int, int]:
7979
if attn_impl is not None and attn_impl == "flash_attn":
8080
# We use the last block as dummy block
8181
num_gpu_blocks = (
82-
self.model_runner.model.model.get_kvcache_num_blocks() - 1)
82+
self.model_runner.model.model.get_kvcache_num_blocks() - 1) \
83+
if self.model_runner.model.model.rbln_config.batch_size > 1 \
84+
else (self.model_runner.model.model.get_kvcache_num_blocks())
8385

8486
if npu_num_blocks := os.environ.get("VLLM_RBLN_NPU_NUM_BLOCKS"):
8587
num_gpu_blocks = int(npu_num_blocks) - 1

0 commit comments

Comments
 (0)