We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 54ade48 commit 9da978dCopy full SHA for 9da978d
benchmarks/single_node/kimik2.5_fp4_b200.sh
@@ -5,7 +5,6 @@ source "$(dirname "$0")/../benchmark_lib.sh"
5
check_env_vars \
6
MODEL \
7
TP \
8
- EP_SIZE \
9
CONC \
10
ISL \
11
OSL \
@@ -29,8 +28,7 @@ PORT=${PORT:-8888}
29
28
30
set -x
31
vllm serve $MODEL --host 0.0.0.0 --port $PORT \
32
---tensor-parallel-size $TP \
33
---expert-parallel-size $EP_SIZE \
+--tensor-parallel-size=$TP \
34
--gpu-memory-utilization 0.90 \
35
--max-model-len $MAX_MODEL_LEN \
36
--max-num-seqs $CONC \
0 commit comments