Skip to content

Commit f7d40c5

Browse files
committed
quick fix to benchmark_worker
1 parent 74eed9a commit f7d40c5

7 files changed

Lines changed: 21 additions & 8 deletions

File tree

benchmarks/benchmark_workers.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,16 @@ def create_cpu_gpu_worker(
8282
num_chunks=model_config.num_layers,
8383
))
8484
finished_ops_queue = mp.Queue()
85+
# Create a shared memory buffer for transfer operations
86+
# max_op_num=4, max_block_num should be larger than num_blocks_to_transfer
87+
max_block_num = max(1024, cache_config.num_cpu_blocks)
88+
op_buffer_tensor = torch.empty((4, max_block_num), dtype=torch.int64).share_memory_()
89+
8590
if model_config.tp_size == 1:
8691
worker_handle = GPUCPUTransferWorker.create_worker(
8792
mp_ctx=mp.get_context('spawn'),
8893
finished_ops_queue=finished_ops_queue,
94+
op_buffer_tensor=op_buffer_tensor,
8995
gpu_blocks=gpu_handles[0].get_tensor_handle_list(),
9096
cpu_blocks=cpu_handle.get_tensor(),
9197
gpu_kv_layout=gpu_handles[0].kv_layout,
@@ -101,6 +107,7 @@ def create_cpu_gpu_worker(
101107
worker_handle = tpGPUCPUTransferWorker.create_worker(
102108
mp_ctx=mp.get_context('spawn'),
103109
finished_ops_queue=finished_ops_queue,
110+
op_buffer_tensor=op_buffer_tensor,
104111
gpu_blocks=[handle.get_tensor_handle_list() for handle in gpu_handles],
105112
cpu_blocks=cpu_handle.get_tensor(),
106113
gpu_kv_layout=gpu_handles[0].kv_layout,
@@ -150,9 +157,15 @@ def create_cpu_ssd_worker(
150157
cache_dir=cache_config.ssd_cache_dir,
151158
)
152159
finished_ops_queue = mp.Queue()
160+
# Create a shared memory buffer for transfer operations
161+
# max_op_num=4, max_block_num should be larger than num_blocks_to_transfer
162+
max_block_num = max(1024, cache_config.num_cpu_blocks)
163+
op_buffer_tensor = torch.empty((4, max_block_num), dtype=torch.int64).share_memory_()
164+
153165
worker_handle = CPUSSDDiskTransferWorker.create_worker(
154166
mp_ctx=mp.get_context('spawn'),
155167
finished_ops_queue=finished_ops_queue,
168+
op_buffer_tensor=op_buffer_tensor,
156169
cpu_blocks=cpu_handle.get_tensor(),
157170
ssd_files=ssd_handle.get_file_list(),
158171
cpu_kv_layout=cpu_handle.kv_layout,

benchmarks/example_config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"enable_ssd": true,
1414
"enable_remote": false,
1515
"tokens_per_block": 16,
16-
"use_gds": false,
16+
"enable_gds": false,
1717
"gpu_kv_layout_type": "LAYERWISE",
1818
"cpu_kv_layout_type": "BLOCKWISE",
1919
"ssd_kv_layout_type": "BLOCKWISE",

docs/dynamo_integration/README_en.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ for i in $(seq 0 $((NUM_WORKERS-1))); do
9393
"enable_cpu": true,
9494
"enable_ssd": false,
9595
"enable_remote": false,
96-
"use_gds": false,
96+
"enable_gds": false,
9797
"enable_trace": false,
9898
"ssd_cache_iouring_entries": 512,
9999
"tokens_per_block": 64,

docs/dynamo_integration/README_zh.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ for i in $(seq 0 $((NUM_WORKERS-1))); do
9494
"enable_cpu": true,
9595
"enable_ssd": false,
9696
"enable_remote": false,
97-
"use_gds": false,
97+
"enable_gds": false,
9898
"enable_trace": false,
9999
"ssd_cache_iouring_entries": 512,
100100
"tokens_per_block": 64,

docs/flexkv_config_reference/README_en.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Below is a production-grade recommended configuration that balances performance
1616
"enable_cpu": true,
1717
"enable_ssd": true,
1818
"enable_remote": false,
19-
"use_gds": false,
19+
"enable_gds": false,
2020
"enable_trace": false,
2121
"ssd_cache_iouring_entries": 512,
2222
"tokens_per_block": 64,
@@ -55,7 +55,7 @@ The FlexKV configuration file is a JSON file, primarily consisting of three part
5555
| `enable_cpu` | bool | true | Whether to enable CPU memory as a cache layer. Strongly recommended to enable. |
5656
| `enable_ssd` | bool | false | Whether to enable SSD as a cache layer. Recommended if NVMe SSD is available. |
5757
| `enable_remote` | bool | false | Whether to enable remote cache (e.g., scalable cloud storage). Requires remote cache engine and custom implementation. |
58-
| `use_gds` | bool | false | Whether to use GPU Direct Storage (GDS) to accelerate SSD I/O. Not currently supported. |
58+
| `enable_gds` | bool | false | Whether to use GPU Direct Storage (GDS) to accelerate SSD I/O. Not currently supported. |
5959
| `index_accel` | bool | false | Whether to enable C++ RadixTree. Recommended to enable. |
6060

6161
---

docs/flexkv_config_reference/README_zh.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
"enable_cpu": true,
1717
"enable_ssd": true,
1818
"enable_remote": false,
19-
"use_gds": false,
19+
"enable_gds": false,
2020
"enable_trace": false,
2121
"ssd_cache_iouring_entries": 512,
2222
"tokens_per_block": 64,
@@ -55,7 +55,7 @@ FlexKV 的配置文件是一个 JSON 文件,主要包含三个部分:
5555
| `enable_cpu` | bool | true | 是否启用 CPU 内存作为缓存层。强烈建议开启。 |
5656
| `enable_ssd` | bool | false | 是否启用 SSD 作为缓存层。如配备 NVMe SSD,建议开启。 |
5757
| `enable_remote` | bool | false | 是否启用远程缓存(如可扩展云存储等)。需要配合远程缓存和自定义的远程缓存引擎使用 |
58-
| `use_gds` | bool | false | 是否使用 GPU Direct Storage(GDS)加速 SSD 读写。目前暂不支持。 |
58+
| `enable_gds` | bool | false | 是否使用 GPU Direct Storage(GDS)加速 SSD 读写。目前暂不支持。 |
5959
| `index_accel` | bool | false | 是否启用C++ RadixTree。推荐开启。 |
6060

6161
---

examples/run_server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def parse_args() -> argparse.Namespace:
8181
enable_cpu=args.enable_cpu,
8282
enable_ssd=args.enable_ssd,
8383
enable_remote=args.enable_remote,
84-
use_gds=False,
84+
enable_gds=False,
8585
enable_trace=False,
8686
ssd_cache_iouring_entries=512,
8787
tokens_per_block=args.block_size,

0 commit comments

Comments
 (0)