We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 575a316 commit 5bc4f5cCopy full SHA for 5bc4f5c
1 file changed
vllm_rbln/worker/utils.py
@@ -78,6 +78,10 @@ def align_2MB(x: int) -> int:
78
available_dram = tensor_parallel_size * (ATOM_DRAM_NBYTES -
79
ATOM_SYS_DRAM_NBYTES)
80
81
+ def check_oom(available_dram: int) -> None:
82
+ if available_dram <= 0:
83
+ raise MemoryError("Insufficient DRAM during block calculation.")
84
+
85
if kernel_size is None:
86
if n_model_params is None:
87
raise ValueError("`n_model_params` should be specified \
@@ -106,6 +110,8 @@ def align_2MB(x: int) -> int:
106
110
buffer = buffer_per_core * tensor_parallel_size
107
111
available_dram -= buffer
108
112
113
+ check_oom(available_dram)
114
109
115
b = kvcache_block_size * align(head_dim, 64) * math.ceil(
116
num_key_value_heads / tensor_parallel_size) * 2
117
c = num_layers * 2 * tensor_parallel_size
0 commit comments