Skip to content

Commit b916195

Browse files
authored
add scripts for 2p4p & 4p2d (#1242)
scripts for 2p4p & 4p2d
1 parent cbbe4f5 commit b916195

29 files changed

+392
-129
lines changed

pd_xpyd/1p1d_mooncake_d.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
2-
"local_hostname": "192.168.100.232",
2+
"local_hostname": "192.168.100.231",
33
"metadata_server": "etcd://10.239.129.81:2379",
4-
"protocol": "tcp",
5-
"device_name": "",
6-
"master_server_address": "192.168.100.222:50001"
4+
"protocol": "rdma",
5+
"device_name": "mlx5_0",
6+
"master_server_address": "192.168.100.221:50001"
77
}

pd_xpyd/1p1d_mooncake_p.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
2-
"local_hostname": "192.168.100.222",
2+
"local_hostname": "192.168.100.221",
33
"metadata_server": "etcd://10.239.129.81:2379",
4-
"protocol": "tcp",
5-
"device_name": "",
6-
"master_server_address": "192.168.100.222:50001"
4+
"protocol": "rdma",
5+
"device_name": "mlx5_0",
6+
"master_server_address": "192.168.100.221:50001"
77
}

pd_xpyd/1p1d_start_decode.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ export VLLM_HPU_LOG_STEP_GRAPH_COMPILATION=true
4444
export GRAPH_VISUALIZATION=1
4545
export PT_HPU_METRICS_GC_DETAILS=1
4646

47-
export VLLM_SKIP_WARMUP=False
47+
export VLLM_SKIP_WARMUP=True
4848
#export PT_HPU_RECIPE_CACHE_CONFIG=./_decode_cache,false,16384
4949

5050
python3 -m vllm.entrypoints.openai.api_server --model $model_path --port 8200 --max-model-len $model_len --gpu-memory-utilization $VLLM_GPU_MEMORY_UTILIZATION -tp 8 --max-num-seqs $max_num_seqs --trust-remote-code --kv-cache-dtype fp8_inc --disable-log-requests --max-num-batched-tokens $max_num_batched_tokens --use-padding-aware-scheduling --use-v2-block-manager --distributed_executor_backend ray --kv-transfer-config '{"kv_connector":"MooncakeStoreConnector","kv_role":"kv_consumer"}'

pd_xpyd/1p1d_start_prefill.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ export VLLM_HPU_LOG_STEP_GRAPH_COMPILATION=true
4747
export GRAPH_VISUALIZATION=1
4848
export PT_HPU_METRICS_GC_DETAILS=1
4949

50-
export VLLM_SKIP_WARMUP=False
50+
export VLLM_SKIP_WARMUP=True
5151
#export PT_HPU_RECIPE_CACHE_CONFIG=./_prefill_cache,false,16384
5252

5353
python3 -m vllm.entrypoints.openai.api_server --model $model_path --port 8100 --max-model-len $model_len --gpu-memory-utilization $VLLM_GPU_MEMORY_UTILIZATION -tp 8 --max-num-seqs $max_num_seqs --trust-remote-code --disable-async-output-proc --kv-cache-dtype fp8_inc --disable-log-requests --max-num-batched-tokens $max_num_batched_tokens --use-padding-aware-scheduling --use-v2-block-manager --distributed_executor_backend ray --kv-transfer-config '{"kv_connector":"MooncakeStoreConnector","kv_role":"kv_producer"}'
File renamed without changes.
File renamed without changes.

pd_xpyd/2p2d_start_decode.sh

Lines changed: 0 additions & 47 deletions
This file was deleted.

pd_xpyd/2p2d_start_prefill.sh

Lines changed: 0 additions & 50 deletions
This file was deleted.

pd_xpyd/2p4d_mooncake_d2.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"local_hostname": "192.168.100.241",
3+
"metadata_server": "etcd://10.239.129.9:2379",
4+
"protocol": "rdma",
5+
"device_name": "mlx5_0",
6+
"master_server_address": "192.168.100.191:50001"
7+
}

pd_xpyd/2p4d_mooncake_d3.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"local_hostname": "192.168.100.91",
3+
"metadata_server": "etcd://10.239.129.9:2379",
4+
"protocol": "rdma",
5+
"device_name": "mlx5_0",
6+
"master_server_address": "192.168.100.191:50001"
7+
}

pd_xpyd/4p2d_start_proxy.sh

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
set +x
2+
#export MODEL_PATH=/software/data/models/DeepSeek-R1-BF16-w8afp8-static-no-ste-G2/
3+
export MODEL_PATH=/mnt/disk2/hf_models/DeepSeek-R1-BF16-w8afp8-static-no-ste-G2/
4+
5+
if [ -z "$1" ]; then
6+
echo "please input the dp size per node, for example, 16dp on 2 node, run the xxx.sh 8"
7+
echo "run with default mode n=8"
8+
NUM_DECODE=8
9+
else
10+
NUM_DECODE=$1
11+
fi
12+
13+
DECODE_IPS=("10.239.129.81" "10.239.129.165")
14+
DBASE_PORT=8200
15+
DECODE_ARGS=""
16+
17+
for ((i=0; i<$NUM_DECODE; i++)); do
18+
PORT=$((DBASE_PORT + i))
19+
for IP in "${DECODE_IPS[@]}"; do
20+
DECODE_ARGS="$DECODE_ARGS ${IP}:${PORT}"
21+
done
22+
done
23+
24+
25+
PREFILL_IPS=("10.239.129.9" "10.239.129.67")
26+
PBASE_PORT=8100
27+
PREFILL_ARGS=""
28+
29+
for ((i=0; i<2; i++)); do
30+
PORT=$((PBASE_PORT))
31+
for IP in "${PREFILL_IPS[@]}"; do
32+
PREFILL_ARGS="$PREFILL_ARGS ${IP}:${PORT}"
33+
done
34+
done
35+
36+
python3 ./examples/online_serving/disagg_examples/disagg_proxy_demo.py \
37+
--model $MODEL_PATH \
38+
--prefill $PREFILL_ARGS \
39+
--decode $DECODE_ARGS \
40+
--port 8868

pd_xpyd/curl.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
curl http://127.0.0.1:8868/v1/completions \
1+
curl http://10.239.129.9:8868/v1/completions \
22
-H "Content-Type: application/json" \
33
-d '{
44
"model": "/mnt/disk2/hf_models/DeepSeek-R1-BF16-w8afp8-static-no-ste-G2/",

pd_xpyd/dp0_2p2d_start_decode.sh renamed to pd_xpyd/dp0_xp2d_start_decode.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#!/bin/bash
22
#set -x
33
BASH_DIR=$(dirname "${BASH_SOURCE[0]}")
4-
source ./pd_xpyd/dp_env.sh
5-
export MOONCAKE_CONFIG_PATH=./pd_xpyd/2p2d_mooncake_d.json
4+
source ./pd_xpyd/dp_d_env.sh
5+
export MOONCAKE_CONFIG_PATH=./pd_xpyd/2p2d_mooncake_d0.json
66

77
TOTAL_INSTANCES=8
88

pd_xpyd/dp0_xp4d_start_decode.sh

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/bin/bash
2+
set -x
3+
BASH_DIR=$(dirname "${BASH_SOURCE[0]}")
4+
source ./pd_xpyd/dp_d_env.sh
5+
export MOONCAKE_CONFIG_PATH=./pd_xpyd/2p2d_mooncake_d0.json
6+
7+
export VLLM_DP_SIZE=32
8+
export VLLM_EP_SIZE=32
9+
10+
TOTAL_INSTANCES=8
11+
12+
if [ -z "$1" ]; then
13+
echo "please input the dp size per node, for example, 16dp on 2 node, run the xxx.sh 8"
14+
echo "run with default mode n=8"
15+
NUM_GROUPS=8
16+
else
17+
NUM_GROUPS=${1:-1}
18+
fi
19+
20+
NUM_INSTANCES=$((TOTAL_INSTANCES / NUM_GROUPS))
21+
22+
dp_size=$((4 * NUM_GROUPS))
23+
export VLLM_DP_SIZE=$dp_size
24+
25+
for ((i=0; i<NUM_GROUPS; i++))
26+
do
27+
28+
RANK=$((0 + i))
29+
port=$((8200 + i))
30+
31+
VLLM_DP_RANK=$RANK python3 -m vllm.entrypoints.openai.api_server \
32+
--model "$model_path" \
33+
--port "$port" \
34+
--max-model-len "$model_len" \
35+
--gpu-memory-utilization "$VLLM_GPU_MEMORY_UTILIZATION" \
36+
-tp $NUM_INSTANCES \
37+
--max-num-seqs "$max_num_seqs" \
38+
--trust-remote-code \
39+
--kv-cache-dtype fp8_inc \
40+
--disable-log-requests \
41+
--max-num-batched-tokens "$max_num_batched_tokens" \
42+
--use-padding-aware-scheduling \
43+
--use-v2-block-manager \
44+
--distributed_executor_backend mp \
45+
--kv-transfer-config '{"kv_connector":"MooncakeStoreConnector","kv_role":"kv_consumer"}' &
46+
done
47+
48+
wait
49+

pd_xpyd/dp1_2p2d_start_decode.sh renamed to pd_xpyd/dp1_xp2d_start_decode.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22
#set -x
33
BASH_DIR=$(dirname "${BASH_SOURCE[0]}")
4-
source ./pd_xpyd/dp_env.sh
4+
source ./pd_xpyd/dp_d_env.sh
55
export MOONCAKE_CONFIG_PATH=./pd_xpyd/2p2d_mooncake_d1.json
66

77
TOTAL_INSTANCES=8

pd_xpyd/dp1_xp4d_start_decode.sh

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/bin/bash
2+
set -x
3+
BASH_DIR=$(dirname "${BASH_SOURCE[0]}")
4+
source ./pd_xpyd/dp_d_env.sh
5+
export MOONCAKE_CONFIG_PATH=./pd_xpyd/2p2d_mooncake_d1.json
6+
7+
export VLLM_DP_SIZE=32
8+
export VLLM_EP_SIZE=32
9+
10+
TOTAL_INSTANCES=8
11+
12+
if [ -z "$1" ]; then
13+
echo "please input the dp size per node, for example, 16dp on 2 node, run the xxx.sh 8"
14+
echo "run with default mode n=8"
15+
NUM_GROUPS=8
16+
else
17+
NUM_GROUPS=${1:-8}
18+
fi
19+
20+
NUM_INSTANCES=$((TOTAL_INSTANCES / NUM_GROUPS))
21+
22+
dp_size=$((4 * NUM_GROUPS))
23+
export VLLM_DP_SIZE=$dp_size
24+
25+
for ((i=0; i<NUM_GROUPS; i++))
26+
do
27+
28+
RANK=$((NUM_GROUPS + i))
29+
port=$((8200 + i))
30+
31+
VLLM_DP_RANK=$RANK python3 -m vllm.entrypoints.openai.api_server \
32+
--model "$model_path" \
33+
--port "$port" \
34+
--max-model-len "$model_len" \
35+
--gpu-memory-utilization "$VLLM_GPU_MEMORY_UTILIZATION" \
36+
-tp $NUM_INSTANCES \
37+
--max-num-seqs "$max_num_seqs" \
38+
--trust-remote-code \
39+
--kv-cache-dtype fp8_inc \
40+
--disable-log-requests \
41+
--max-num-batched-tokens "$max_num_batched_tokens" \
42+
--use-padding-aware-scheduling \
43+
--use-v2-block-manager \
44+
--distributed_executor_backend mp \
45+
--kv-transfer-config '{"kv_connector":"MooncakeStoreConnector","kv_role":"kv_consumer"}' &
46+
done
47+
48+
wait
49+

pd_xpyd/dp2_xp4d_start_decode.sh

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/bin/bash
2+
set -x
3+
BASH_DIR=$(dirname "${BASH_SOURCE[0]}")
4+
source ./pd_xpyd/dp_d_env.sh
5+
export MOONCAKE_CONFIG_PATH=./pd_xpyd/2p4d_mooncake_d2.json
6+
7+
export VLLM_DP_SIZE=32
8+
export VLLM_EP_SIZE=32
9+
10+
TOTAL_INSTANCES=8
11+
12+
if [ -z "$1" ]; then
13+
echo "please input the dp size per node, for example, 16dp on 2 node, run the xxx.sh 8"
14+
echo "run with default mode n=8"
15+
NUM_GROUPS=8
16+
else
17+
NUM_GROUPS=${1:-1}
18+
fi
19+
20+
NUM_INSTANCES=$((TOTAL_INSTANCES / NUM_GROUPS))
21+
22+
dp_size=$((4 * NUM_GROUPS))
23+
export VLLM_DP_SIZE=$dp_size
24+
25+
for ((i=0; i<NUM_GROUPS; i++))
26+
do
27+
28+
RANK=$((NUM_GROUPS * 2 + i))
29+
port=$((8200 + i))
30+
31+
VLLM_DP_RANK=$RANK python3 -m vllm.entrypoints.openai.api_server \
32+
--model "$model_path" \
33+
--port "$port" \
34+
--max-model-len "$model_len" \
35+
--gpu-memory-utilization "$VLLM_GPU_MEMORY_UTILIZATION" \
36+
-tp $NUM_INSTANCES \
37+
--max-num-seqs "$max_num_seqs" \
38+
--trust-remote-code \
39+
--kv-cache-dtype fp8_inc \
40+
--disable-log-requests \
41+
--max-num-batched-tokens "$max_num_batched_tokens" \
42+
--use-padding-aware-scheduling \
43+
--use-v2-block-manager \
44+
--distributed_executor_backend mp \
45+
--kv-transfer-config '{"kv_connector":"MooncakeStoreConnector","kv_role":"kv_consumer"}' &
46+
done
47+
48+
wait
49+

0 commit comments

Comments
 (0)