Skip to content

Commit bf87bc6

Browse files
committed
gaie config for wide-ep
Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
1 parent 1309792 commit bf87bc6

2 files changed

Lines changed: 35 additions & 8 deletions

File tree

scenarios/guides/wide-ep-lws.sh

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
export LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_CLASS_NAME=kgateway
2+
13
# WIDE EP/DP WITH LWS WELL LIT PATH
24
# Based on https://github.com/llm-d/llm-d/tree/main/guides/wide-ep-lws/README.md
35
# Removed pod monitoring; can be added using LLMDBENCH_VLLM_MODELSERVICE_EXTRA_POD_CONFIG
@@ -20,7 +22,33 @@ export LLMDBENCH_DEPLOY_MODEL_LIST="deepseek-ai/DeepSeek-R1-0528"
2022
export LLMDBENCH_VLLM_COMMON_PVC_MODEL_CACHE_SIZE=1Ti
2123

2224
# Routing configuration (via gaie)
23-
export LLMDBENCH_VLLM_MODELSERVICE_GAIE_PLUGINS_CONFIGFILE=custom-plugins.yaml
25+
export LLMDBENCH_VLLM_MODELSERVICE_GAIE_PLUGINS_CONFIGFILE="custom-plugins.yaml"
26+
export LLMDBENCH_VLLM_MODELSERVICE_GAIE_CUSTOM_PLUGINS=$(mktemp)
27+
cat << EOF > $LLMDBENCH_VLLM_MODELSERVICE_GAIE_CUSTOM_PLUGINS
28+
custom-plugins.yaml: |
29+
apiVersion: inference.networking.x-k8s.io/v1alpha1
30+
kind: EndpointPickerConfig
31+
plugins:
32+
- type: prefill-header-handler
33+
- type: prefill-filter
34+
- type: decode-filter
35+
- type: random-picker
36+
parameters:
37+
maxNumOfEndpoints: 1
38+
- type: pd-profile-handler
39+
parameters:
40+
threshold: 0
41+
hashBlockSize: 5
42+
schedulingProfiles:
43+
- name: prefill
44+
plugins:
45+
- pluginRef: prefill-filter
46+
- pluginRef: random-picker
47+
- name: decode
48+
plugins:
49+
- pluginRef: decode-filter
50+
- pluginRef: random-picker
51+
EOF
2452

2553
# Routing configuration (via modelservice)
2654
# export LLMDBENCH_LLMD_ROUTINGSIDECAR_CONNECTOR=nixlv2 # already the default
@@ -124,10 +152,10 @@ export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_NETWORK_NR=1
124152
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EPHEMERAL_STORAGE_NR=1Ti
125153
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_INFERENCE_PORT=8000
126154
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_MODEL_COMMAND=custom
127-
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_PREPROCESS="python3 /setup/preprocess/set_llmdbench_environment.py; source \$HOME/llmdbench_env.sh"
155+
# export LLMDBENCH_VLLM_MODELSERVICE_DECODE_PREPROCESS="python3 /setup/preprocess/set_llmdbench_environment.py; source \$HOME/llmdbench_env.sh"
128156
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_ARGS=$(mktemp)
129157
cat << EOF > $LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_ARGS
130-
exec vllm serve \
158+
find /dev/shm -type f -delete; START_RANK=\$(( \${LWS_WORKER_INDEX:-0} * DP_SIZE_LOCAL )); exec vllm serve \
131159
REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL \
132160
--port 8000 \
133161
--trust-remote-code \
@@ -234,13 +262,12 @@ export LLMDBENCH_VLLM_MODELSERVICE_DECODE_NETWORK_NR=1
234262
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_EPHEMERAL_STORAGE_NR=1Ti
235263
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_INFERENCE_PORT=8200
236264
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_MODEL_COMMAND=custom
237-
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_PREPROCESS="python3 /setup/preprocess/set_llmdbench_environment.py; source \$HOME/llmdbench_env.sh"
265+
# export LLMDBENCH_VLLM_MODELSERVICE_DECODE_PREPROCESS="python3 /setup/preprocess/set_llmdbench_environment.py; source \$HOME/llmdbench_env.sh"
238266
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_ARGS=$(mktemp)
239-
cat << EOF > $LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_ARGS
240267
# Clear /dev/shm on start to prevent running out of space when crashes occur
241268
# https://github.com/llm-d/llm-d/issues/352
242-
find /dev/shm -type f -delete; \
243-
exec vllm serve \
269+
cat << EOF > $LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_ARGS
270+
find /dev/shm -type f -delete; START_RANK=\$(( \${LWS_WORKER_INDEX:-0} * DP_SIZE_LOCAL )); exec vllm serve \
244271
REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL \
245272
--port 8200 \
246273
--trust-remote-code \

setup/steps/09_deploy_via_modelservice.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ def generate_ms_values_yaml(
291291
readinessProbe:
292292
httpGet:
293293
path: /health
294-
port: 8200
294+
port: {decode_inference_port}
295295
failureThreshold: 3
296296
periodSeconds: 5
297297
{add_config(decode_extra_container_config, 6).lstrip()}

0 commit comments

Comments
 (0)