Skip to content

Commit d353d89

Browse files
committed
fix component_nr
Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
1 parent bf87bc6 commit d353d89

5 files changed

Lines changed: 67 additions & 22 deletions

File tree

scenarios/guides/wide-ep-lws.sh

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
export LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_CLASS_NAME=kgateway
2-
31
# WIDE EP/DP WITH LWS WELL LIT PATH
42
# Based on https://github.com/llm-d/llm-d/tree/main/guides/wide-ep-lws/README.md
53
# Removed pod monitoring; can be added using LLMDBENCH_VLLM_MODELSERVICE_EXTRA_POD_CONFIG
@@ -21,6 +19,14 @@ export LLMDBENCH_DEPLOY_MODEL_LIST="deepseek-ai/DeepSeek-R1-0528"
2119
#export LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS=ocs-storagecluster-cephfs
2220
export LLMDBENCH_VLLM_COMMON_PVC_MODEL_CACHE_SIZE=1Ti
2321

22+
# gateway configuration
23+
###### default is istio and NodePort
24+
# export LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_CLASS_NAME=kgateway
25+
###### on openshift as alternative to (default) NodePort
26+
# export LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_SERVICE_TYPE=ClusterIP
27+
###### if support LoadBalancer
28+
# export LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_SERVICE_TYPE=LoadBalancer
29+
2430
# Routing configuration (via gaie)
2531
export LLMDBENCH_VLLM_MODELSERVICE_GAIE_PLUGINS_CONFIGFILE="custom-plugins.yaml"
2632
export LLMDBENCH_VLLM_MODELSERVICE_GAIE_CUSTOM_PLUGINS=$(mktemp)
@@ -49,6 +55,25 @@ custom-plugins.yaml: |
4955
- pluginRef: decode-filter
5056
- pluginRef: random-picker
5157
EOF
58+
export LLMDBENCH_VLLM_MODELSERVICE_INFERENCE_POOL_PROVIDER_CONFIG=$(mktemp)
59+
cat << EOF > $LLMDBENCH_VLLM_MODELSERVICE_INFERENCE_POOL_PROVIDER_CONFIG
60+
destinationRule:
61+
host: REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL_ID_LABEL-gaie-epp
62+
trafficPolicy:
63+
tls:
64+
mode: SIMPLE
65+
insecureSkipVerify: true
66+
connectionPool:
67+
http:
68+
http1MaxPendingRequests: 256000
69+
maxRequestsPerConnection: 256000
70+
http2MaxRequests: 256000
71+
idleTimeout: "900s"
72+
tcp:
73+
maxConnections: 256000
74+
maxConnectionDuration: "1800s"
75+
connectTimeout: "900s"
76+
EOF
5277

5378
# Routing configuration (via modelservice)
5479
# export LLMDBENCH_LLMD_ROUTINGSIDECAR_CONNECTOR=nixlv2 # already the default

setup/env.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ export LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_SERVICE_TYPE=${LLMDBENCH_VLLM_MODELSE
192192
export LLMDBENCH_VLLM_MODELSERVICE_ROUTE=${LLMDBENCH_VLLM_MODELSERVICE_ROUTE:-false}
193193
# Endpoint Picker Parameters
194194
export LLMDBENCH_VLLM_MODELSERVICE_GAIE_PLUGINS_CONFIGFILE=${LLMDBENCH_VLLM_MODELSERVICE_GAIE_PLUGINS_CONFIGFILE:-"default-plugins.yaml"}
195-
195+
export LLMDBENCH_VLLM_MODELSERVICE_INFERENCE_POOL_PROVIDER_CONFIG=${LLMDBENCH_VLLM_MODELSERVICE_INFERENCE_POOL_PROVIDER_CONFIG:-""}
196196
export LLMDBENCH_LLMD_ROUTINGSIDECAR_CONNECTOR=${LLMDBENCH_LLMD_ROUTINGSIDECAR_CONNECTOR:-"nixlv2"}
197197
export LLMDBENCH_LLMD_ROUTINGSIDECAR_DEBUG_LEVEL=${LLMDBENCH_LLMD_ROUTINGSIDECAR_DEBUG_LEVEL:-3}
198198

setup/steps/07_deploy_setup.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,26 +17,25 @@ def gateway_values(provider : str, host: str, service: str) -> str:
1717
if provider == "istio":
1818
return f"""gateway:
1919
gatewayClassName: istio
20-
service:
21-
type: {service}
22-
destinationRule:
20+
gatewayParameters:
2321
enabled: true
24-
trafficPolicy:
25-
tls:
26-
mode: SIMPLE
27-
insecureSkipVerify: true
28-
host: {host}"""
22+
service:
23+
type: {service}"""
2924

3025
elif provider == "kgateway":
3126
return f"""gateway:
3227
gatewayClassName: kgateway
28+
"""
29+
30+
elif provider == "kgateway-openshift":
31+
return f"""gateway:
32+
gatewayClassName: kgateway
3333
service:
3434
type: {service}
35-
# destinationRule:
36-
# host: {host}
3735
gatewayParameters:
3836
enabled: true
3937
"""
38+
4039
elif provider == "gke":
4140
return f"""gateway:
4241
gatewayClassName: gke-l7-regional-external-managed
@@ -165,7 +164,10 @@ def main():
165164
# Create infra values file
166165
infra_value_file = Path(helm_base_dir / "infra.yaml" )
167166
with open(infra_value_file, 'w') as f:
168-
f.write(gateway_values(ev['vllm_modelservice_gateway_class_name'], f"{model_id_label}-gaie-epp.{ev['vllm_common_namespace']}{ev['vllm_common_fqdn']}", ev["vllm_modelservice_gateway_service_type"]))
167+
gw_class = ev['vllm_modelservice_gateway_class_name']
168+
if gw_class == 'kgateway' and ev['control_deploy_is_openshift']:
169+
gw_class = f"{gw_class}-openshift"
170+
f.write(gateway_values(gw_class, f"{model_id_label}-gaie-epp.{ev['vllm_common_namespace']}{ev['vllm_common_fqdn']}", ev["vllm_modelservice_gateway_service_type"]))
169171

170172
os.environ["LLMDBENCH_DEPLOY_CURRENT_MODEL_ID_LABEL"] = model_id_label
171173

@@ -250,7 +252,7 @@ def main():
250252
exit(result)
251253
announce(f"✅ chart \"infra-{ev['vllm_modelservice_release']}\" deployed successfully")
252254

253-
announce("✅ Completed gaie deployment")
255+
announce("✅ Completed gateway deployment")
254256
else:
255257
deploy_methods = ev["deploy_methods"]
256258
announce(f"⏭️ Environment types are \"{deploy_methods}\". Skipping this step.")

setup/steps/08_deploy_gaie.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ def main():
125125
secretKeyRef:
126126
name: {ev["vllm_common_hf_token_name"]}
127127
key: {ev["vllm_common_hf_token_key"]}"""
128+
129+
gaie_provider = provider(ev['vllm_modelservice_gateway_class_name'])
130+
ip_provider_config = ev.get("vllm_modelservice_inference_pool_provider_config", "")
128131

129132
# Generate GAIE values YAML content
130133
gaie_values_content = f"""inferenceExtension:
@@ -155,8 +158,17 @@ def main():
155158
matchLabels:
156159
llm-d.ai/inferenceServing: "true"
157160
llm-d.ai/model: {model_id_label}
158-
provider:
159-
name: {provider(ev['vllm_modelservice_gateway_class_name'])}
161+
"""
162+
if gaie_provider != "none" or ip_provider_config != "":
163+
gaie_values_content = f"""{gaie_values_content}
164+
provider:"""
165+
if gaie_provider != "none":
166+
gaie_values_content = f"""{gaie_values_content}
167+
name: {gaie_provider}
168+
"""
169+
if ip_provider_config != "":
170+
gaie_values_content = f"""{gaie_values_content}
171+
{add_config(ip_provider_config, 4, f"{ev['vllm_modelservice_gateway_class_name']}:").lstrip()}
160172
"""
161173
# Write GAIE values file
162174
gaie_values_file = helm_dir / "gaie-values.yaml"
@@ -218,7 +230,7 @@ def main():
218230

219231
model_number += 1
220232

221-
announce("✅ Completed model deployment")
233+
announce("✅ Completed GAIE deployment")
222234
else:
223235
announce(f"⏭️ Environment types are \"{ev['deploy_methods']}\". Skipping this step.")
224236

setup/steps/09_deploy_via_modelservice.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ def generate_ms_values_yaml(
291291
readinessProbe:
292292
httpGet:
293293
path: /health
294-
port: {decode_inference_port}
294+
port: {common_inference_port}
295295
failureThreshold: 3
296296
periodSeconds: 5
297297
{add_config(decode_extra_container_config, 6).lstrip()}
@@ -351,7 +351,7 @@ def generate_ms_values_yaml(
351351
readinessProbe:
352352
httpGet:
353353
path: /health
354-
port: {prefill_inference_port}
354+
port: {common_inference_port}
355355
failureThreshold: 3
356356
periodSeconds: 5
357357
{add_config(prefill_extra_container_config, 6).lstrip()}
@@ -561,15 +561,21 @@ def main():
561561

562562
# Wait for decode pods to be created, running, and ready
563563
api_client = client.CoreV1Api()
564+
expected_num_decode_pods = ev["vllm_modelservice_decode_replicas"]
565+
if ev.get("vllm_modelservice_multinode", "false"):
566+
expected_num_decode_pods = int(ev.get("vllm_modelservice_decode_num_workers_parallelism", "1")) * int(expected_num_decode_pods)
564567
result = wait_for_pods_created_running_ready(
565-
api_client, ev, ev["vllm_modelservice_decode_replicas"], "decode"
568+
api_client, ev, expected_num_decode_pods, "decode"
566569
)
567570
if result != 0:
568571
return result
569572

570573
# Wait for prefill pods to be created, running, and ready
574+
expected_num_prefill_pods = ev["vllm_modelservice_prefill_replicas"]
575+
if ev.get("vllm_modelservice_multinode", "false"):
576+
expected_num_prefill_pods = int(ev.get("vllm_modelservice_prefill_num_workers_parallelism", "1")) * int(expected_num_prefill_pods)
571577
result = wait_for_pods_created_running_ready(
572-
api_client, ev, ev["vllm_modelservice_prefill_replicas"], "prefill"
578+
api_client, ev, expected_num_prefill_pods, "prefill"
573579
)
574580
if result != 0:
575581
return result

0 commit comments

Comments
 (0)