@@ -82,10 +82,16 @@ def create_cpu_gpu_worker(
8282 num_chunks = model_config .num_layers ,
8383 ))
8484 finished_ops_queue = mp .Queue ()
85+ # Create a shared memory buffer for transfer operations
86+ # max_op_num=4, max_block_num should be larger than num_blocks_to_transfer
87+ max_block_num = max (1024 , cache_config .num_cpu_blocks )
88+ op_buffer_tensor = torch .empty ((4 , max_block_num ), dtype = torch .int64 ).share_memory_ ()
89+
8590 if model_config .tp_size == 1 :
8691 worker_handle = GPUCPUTransferWorker .create_worker (
8792 mp_ctx = mp .get_context ('spawn' ),
8893 finished_ops_queue = finished_ops_queue ,
94+ op_buffer_tensor = op_buffer_tensor ,
8995 gpu_blocks = gpu_handles [0 ].get_tensor_handle_list (),
9096 cpu_blocks = cpu_handle .get_tensor (),
9197 gpu_kv_layout = gpu_handles [0 ].kv_layout ,
@@ -101,6 +107,7 @@ def create_cpu_gpu_worker(
101107 worker_handle = tpGPUCPUTransferWorker .create_worker (
102108 mp_ctx = mp .get_context ('spawn' ),
103109 finished_ops_queue = finished_ops_queue ,
110+ op_buffer_tensor = op_buffer_tensor ,
104111 gpu_blocks = [handle .get_tensor_handle_list () for handle in gpu_handles ],
105112 cpu_blocks = cpu_handle .get_tensor (),
106113 gpu_kv_layout = gpu_handles [0 ].kv_layout ,
@@ -150,9 +157,15 @@ def create_cpu_ssd_worker(
150157 cache_dir = cache_config .ssd_cache_dir ,
151158 )
152159 finished_ops_queue = mp .Queue ()
160+ # Create a shared memory buffer for transfer operations
161+ # max_op_num=4, max_block_num should be larger than num_blocks_to_transfer
162+ max_block_num = max (1024 , cache_config .num_cpu_blocks )
163+ op_buffer_tensor = torch .empty ((4 , max_block_num ), dtype = torch .int64 ).share_memory_ ()
164+
153165 worker_handle = CPUSSDDiskTransferWorker .create_worker (
154166 mp_ctx = mp .get_context ('spawn' ),
155167 finished_ops_queue = finished_ops_queue ,
168+ op_buffer_tensor = op_buffer_tensor ,
156169 cpu_blocks = cpu_handle .get_tensor (),
157170 ssd_files = ssd_handle .get_file_list (),
158171 cpu_kv_layout = cpu_handle .kv_layout ,
0 commit comments