Skip to content

Commit bab39ce

Browse files
rebel-jinhwanclaude
authored andcommitted
compat(dp,kv): drop removed DPMetadata.max_tokens_across_dp_cpu; add apply_admission_cap param
- vLLM 0.22 removed DPMetadata.max_tokens_across_dp_cpu; the max is derivable from num_tokens_across_dp_cpu and has no consumers here. - RBLNSlidingWindowManager: accept apply_admission_cap kwarg. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
1 parent b4fe0af commit bab39ce

2 files changed

Lines changed: 1 addition & 4 deletions

File tree

vllm_rbln/forward_context.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,6 @@ def make(
166166
num_tokens_across_dp_cpu = num_tokens_across_dp
167167
max_pad = num_padded_tokens
168168

169-
max_tokens_across_dp_cpu = torch.max(num_tokens_across_dp_cpu)
170169
max_pads_across_dp = torch.empty(max_pad, device="cpu")
171170
else:
172171
assert num_tokens_across_dp is None, (
@@ -178,11 +177,9 @@ def make(
178177
num_tokens_across_dp_cpu = torch.tensor(
179178
[num_tokens], device="cpu", dtype=torch.int32
180179
)
181-
max_tokens_across_dp_cpu = num_tokens
182180
max_pads_across_dp = None
183181

184182
return RBLNDPMetadata(
185-
max_tokens_across_dp_cpu,
186183
num_tokens_across_dp_cpu,
187184
max_pads_across_dp=max_pads_across_dp,
188185
)
@@ -221,7 +218,6 @@ def _set_forward_context(
221218
num_padded_tokens,
222219
)
223220

224-
# NOTE: vLLM v0.22 removed the virtual_engine parameter
225221
forward_context = create_forward_context(
226222
attn_metadata,
227223
vllm_config,

vllm_rbln/v1/kv_cache.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def get_num_blocks_to_allocate(
5353
new_computed_blocks: Sequence[KVCacheBlock],
5454
total_computed_tokens: int,
5555
num_tokens_main_model: int,
56+
apply_admission_cap: bool = False,
5657
) -> int:
5758
return 0 if self.req_to_blocks[request_id] else 1
5859

0 commit comments

Comments
 (0)