diff --git a/vllm_rbln/v1/worker/rbln_model_runner.py b/vllm_rbln/v1/worker/rbln_model_runner.py index e7c0b49ed..4f62256a1 100644 --- a/vllm_rbln/v1/worker/rbln_model_runner.py +++ b/vllm_rbln/v1/worker/rbln_model_runner.py @@ -3391,7 +3391,7 @@ def _allocate_kv_cache_tensors( for kv_cache_tensor in kv_cache_config.kv_cache_tensors: tensor = torch.zeros(kv_cache_tensor.size, dtype=torch.int8, - device="cpu") + device="meta") for layer_name in kv_cache_tensor.shared_by: kv_cache_raw_tensors[layer_name] = tensor