Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions benchmarks/benchmark_workers.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,16 @@ def create_cpu_gpu_worker(
num_chunks=model_config.num_layers,
))
finished_ops_queue = mp.Queue()
# Create a shared memory buffer for transfer operations
# max_op_num=4, max_block_num should be larger than num_blocks_to_transfer
max_block_num = max(1024, cache_config.num_cpu_blocks)
op_buffer_tensor = torch.empty((4, max_block_num), dtype=torch.int64).share_memory_()

if model_config.tp_size == 1:
worker_handle = GPUCPUTransferWorker.create_worker(
mp_ctx=mp.get_context('spawn'),
finished_ops_queue=finished_ops_queue,
op_buffer_tensor=op_buffer_tensor,
gpu_blocks=gpu_handles[0].get_tensor_handle_list(),
cpu_blocks=cpu_handle.get_tensor(),
gpu_kv_layout=gpu_handles[0].kv_layout,
Expand All @@ -101,6 +107,7 @@ def create_cpu_gpu_worker(
worker_handle = tpGPUCPUTransferWorker.create_worker(
mp_ctx=mp.get_context('spawn'),
finished_ops_queue=finished_ops_queue,
op_buffer_tensor=op_buffer_tensor,
gpu_blocks=[handle.get_tensor_handle_list() for handle in gpu_handles],
cpu_blocks=cpu_handle.get_tensor(),
gpu_kv_layout=gpu_handles[0].kv_layout,
Expand Down Expand Up @@ -150,9 +157,15 @@ def create_cpu_ssd_worker(
cache_dir=cache_config.ssd_cache_dir,
)
finished_ops_queue = mp.Queue()
# Create a shared memory buffer for transfer operations
# max_op_num=4, max_block_num should be larger than num_blocks_to_transfer
max_block_num = max(1024, cache_config.num_cpu_blocks)
op_buffer_tensor = torch.empty((4, max_block_num), dtype=torch.int64).share_memory_()

worker_handle = CPUSSDDiskTransferWorker.create_worker(
mp_ctx=mp.get_context('spawn'),
finished_ops_queue=finished_ops_queue,
op_buffer_tensor=op_buffer_tensor,
cpu_blocks=cpu_handle.get_tensor(),
ssd_files=ssd_handle.get_file_list(),
cpu_kv_layout=cpu_handle.kv_layout,
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/example_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"enable_ssd": true,
"enable_remote": false,
"tokens_per_block": 16,
"use_gds": false,
"enable_gds": false,
"gpu_kv_layout_type": "LAYERWISE",
"cpu_kv_layout_type": "BLOCKWISE",
"ssd_kv_layout_type": "BLOCKWISE",
Expand Down
2 changes: 1 addition & 1 deletion docs/dynamo_integration/README_en.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ for i in $(seq 0 $((NUM_WORKERS-1))); do
"enable_cpu": true,
"enable_ssd": false,
"enable_remote": false,
"use_gds": false,
"enable_gds": false,
"enable_trace": false,
"ssd_cache_iouring_entries": 512,
"tokens_per_block": 64,
Expand Down
2 changes: 1 addition & 1 deletion docs/dynamo_integration/README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ for i in $(seq 0 $((NUM_WORKERS-1))); do
"enable_cpu": true,
"enable_ssd": false,
"enable_remote": false,
"use_gds": false,
"enable_gds": false,
"enable_trace": false,
"ssd_cache_iouring_entries": 512,
"tokens_per_block": 64,
Expand Down
4 changes: 2 additions & 2 deletions docs/flexkv_config_reference/README_en.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Below is a production-grade recommended configuration that balances performance
"enable_cpu": true,
"enable_ssd": true,
"enable_remote": false,
"use_gds": false,
"enable_gds": false,
"enable_trace": false,
"ssd_cache_iouring_entries": 512,
"tokens_per_block": 64,
Expand Down Expand Up @@ -55,7 +55,7 @@ The FlexKV configuration file is a JSON file, primarily consisting of three part
| `enable_cpu` | bool | true | Whether to enable CPU memory as a cache layer. Strongly recommended to enable. |
| `enable_ssd` | bool | false | Whether to enable SSD as a cache layer. Recommended if NVMe SSD is available. |
| `enable_remote` | bool | false | Whether to enable remote cache (e.g., scalable cloud storage). Requires remote cache engine and custom implementation. |
| `use_gds` | bool | false | Whether to use GPU Direct Storage (GDS) to accelerate SSD I/O. Not currently supported. |
| `enable_gds` | bool | false | Whether to use GPU Direct Storage (GDS) to accelerate SSD I/O. Not currently supported. |
| `index_accel` | bool | false | Whether to enable C++ RadixTree. Recommended to enable. |

---
Expand Down
4 changes: 2 additions & 2 deletions docs/flexkv_config_reference/README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"enable_cpu": true,
"enable_ssd": true,
"enable_remote": false,
"use_gds": false,
"enable_gds": false,
"enable_trace": false,
"ssd_cache_iouring_entries": 512,
"tokens_per_block": 64,
Expand Down Expand Up @@ -55,7 +55,7 @@ FlexKV 的配置文件是一个 JSON 文件,主要包含三个部分:
| `enable_cpu` | bool | true | 是否启用 CPU 内存作为缓存层。强烈建议开启。 |
| `enable_ssd` | bool | false | 是否启用 SSD 作为缓存层。如配备 NVMe SSD,建议开启。 |
| `enable_remote` | bool | false | 是否启用远程缓存(如可扩展云存储等)。需要配合远程缓存和自定义的远程缓存引擎使用 |
| `use_gds` | bool | false | 是否使用 GPU Direct Storage(GDS)加速 SSD 读写。目前暂不支持。 |
| `enable_gds` | bool | false | 是否使用 GPU Direct Storage(GDS)加速 SSD 读写。目前暂不支持。 |
| `index_accel` | bool | false | 是否启用C++ RadixTree。推荐开启。 |

---
Expand Down
2 changes: 1 addition & 1 deletion examples/run_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def parse_args() -> argparse.Namespace:
enable_cpu=args.enable_cpu,
enable_ssd=args.enable_ssd,
enable_remote=args.enable_remote,
use_gds=False,
enable_gds=False,
enable_trace=False,
ssd_cache_iouring_entries=512,
tokens_per_block=args.block_size,
Expand Down