Skip to content

Commit 5a161cd

Browse files
committed
use newer modelservice
Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
1 parent 061a0c3 commit 5a161cd

3 files changed

Lines changed: 114 additions & 64 deletions

File tree

setup/env.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,15 +172,13 @@ export LLMDBENCH_GATEWAY_API_INFERENCE_EXTENSION_CRD_REVISION=${LLMDBENCH_GATEWA
172172
export LLMDBENCH_VLLM_MODELSERVICE_RELEASE=${LLMDBENCH_VLLM_MODELSERVICE_RELEASE:-"llmdbench"}
173173
export LLMDBENCH_VLLM_MODELSERVICE_VALUES_FILE=${LLMDBENCH_VLLM_MODELSERVICE_VALUES_FILE:-"default-values.yaml"}
174174
export LLMDBENCH_VLLM_MODELSERVICE_ADDITIONAL_SETS=${LLMDBENCH_VLLM_MODELSERVICE_ADDITIONAL_SETS:-""}
175-
export LLMDBENCH_VLLM_MODELSERVICE_CHART_VERSION=${LLMDBENCH_VLLM_MODELSERVICE_CHART_VERSION:-v0.2.16}
175+
export LLMDBENCH_VLLM_MODELSERVICE_CHART_VERSION=${LLMDBENCH_VLLM_MODELSERVICE_CHART_VERSION:-auto}
176176
export LLMDBENCH_VLLM_MODELSERVICE_CHART_NAME=${LLMDBENCH_VLLM_MODELSERVICE_CHART_NAME:-"llm-d-modelservice"}
177177
export LLMDBENCH_VLLM_MODELSERVICE_HELM_REPOSITORY=${LLMDBENCH_VLLM_MODELSERVICE_HELM_REPOSITORY:-"llm-d-modelservice"}
178178
export LLMDBENCH_VLLM_MODELSERVICE_HELM_REPOSITORY_URL=${LLMDBENCH_VLLM_MODELSERVICE_HELM_REPOSITORY_URL:-"https://llm-d-incubation.github.io/llm-d-modelservice/"}
179179
export LLMDBENCH_VLLM_MODELSERVICE_URI_PROTOCOL=${LLMDBENCH_VLLM_MODELSERVICE_URI_PROTOCOL:-"pvc"}
180180
export LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_CLASS_NAME=${LLMDBENCH_VLLM_MODELSERVICE_GATEWAY_CLASS_NAME:-kgateway}
181181
export LLMDBENCH_VLLM_MODELSERVICE_ROUTE=${LLMDBENCH_VLLM_MODELSERVICE_ROUTE:-true}
182-
export LLMDBENCH_VLLM_MODELSERVICE_EPP=${LLMDBENCH_VLLM_MODELSERVICE_EPP:-false}
183-
export LLMDBENCH_VLLM_MODELSERVICE_INFERENCE_POOL=${LLMDBENCH_VLLM_MODELSERVICE_INFERENCE_POOL:-false}
184182
# Endpoint Picker Parameters
185183
export LLMDBENCH_VLLM_MODELSERVICE_GAIE_PLUGINS_CONFIGFILE=${LLMDBENCH_VLLM_MODELSERVICE_GAIE_PLUGINS_CONFIGFILE:-"default-plugins.yaml"}
186184

setup/functions.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,37 @@ async def wait_for_job(job_name, namespace, timeout=7200, dry_run: bool = False)
722722
finally:
723723
await api_client.close()
724724

725+
def create_httproute(
726+
api: pykube.HTTPClient, obj_spec: str, dry_run: bool = False, verbose: bool = False
727+
):
728+
729+
obj_spec = clear_string(obj_spec)
730+
obj_spec = yaml.safe_load(obj_spec)
731+
732+
obj_type_label = "HTTPRoute"
733+
obj_name = obj_spec["metadata"]["name"]
734+
735+
if not obj_spec:
736+
announce(f"Error: {obj_type_label} spec cannot be empty.")
737+
return
738+
739+
HTTPRoute = pykube.object_factory(api, "gateway.networking.k8s.io/v1", "HTTPRoute")
740+
p = HTTPRoute(api, obj_spec)
741+
742+
announce(f"🚀 create_httproute called {p}")
743+
744+
try:
745+
if p.exists():
746+
True
747+
# p.update()
748+
else:
749+
if dry_run:
750+
announce(f"[DRY RUN] Would have created {obj_type_label} '{obj_name}'.")
751+
else:
752+
p.create()
753+
announce(f"✅ {obj_type_label} '{obj_name}' created successfully.")
754+
except PyKubeError as e:
755+
announce(f"❌ Failed to create or update {obj_type_label} '{obj_name}': {e}")
725756

726757
def model_attribute(model: str, attribute: str) -> str:
727758

setup/steps/09_deploy_via_modelservice.py

Lines changed: 82 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import sys
55
from pathlib import Path
66

7+
import pykube
8+
79
# Add project root to path for imports
810
current_file = Path(__file__).resolve()
911
project_root = current_file.parents[1]
@@ -30,7 +32,9 @@
3032
add_additional_env_to_yaml,
3133
add_config,
3234
clear_string,
33-
install_wva_components
35+
install_wva_components,
36+
kube_connect,
37+
create_httproute
3438
)
3539

3640

@@ -123,9 +127,6 @@ def generate_ms_values_yaml(
123127

124128
# Routing section
125129
service_port = ev.get("vllm_common_inference_port", "8000")
126-
release = ev.get("vllm_modelservice_release", "")
127-
route_enabled = ev.get("vllm_modelservice_route", "false")
128-
model_id = ev.get("deploy_current_model_id", "")
129130
model_id_label = ev.get("deploy_current_model_id_label", "")
130131

131132
# Image details
@@ -146,10 +147,6 @@ def generate_ms_values_yaml(
146147
proxy_connector = ev.get("llmd_routingsidecar_connector", "")
147148
proxy_debug_level = ev.get("llmd_routingsidecar_debug_level", "")
148149

149-
# EPP and routing configuration
150-
inference_pool_create = ev.get("vllm_modelservice_inference_pool", "true")
151-
epp_create = ev.get("vllm_modelservice_epp", "true")
152-
153150
# Decode configuration
154151
decode_replicas = int(ev.get("vllm_modelservice_decode_replicas", "0"))
155152
decode_create = "true" if decode_replicas > 0 else "false"
@@ -250,11 +247,6 @@ def generate_ms_values_yaml(
250247
# Environment variables to YAML
251248
envvars_to_yaml = ev.get("vllm_common_envvars_to_yaml", "")
252249

253-
# Read the rules file content
254-
rules_content = ""
255-
if rules_file.exists():
256-
rules_content = rules_file.read_text().rstrip()
257-
258250
# Build decode resources section cleanly
259251
decode_limits_resources = []
260252
decode_requests_resources = []
@@ -376,46 +368,17 @@ def generate_ms_values_yaml(
376368
size: {model_size}
377369
authSecretName: "llm-d-hf-token"
378370
name: {model_name}
371+
labels:
372+
llm-d.ai/inferenceServing: "true"
373+
llm-d.ai/model: {model_id_label}
379374
380375
routing:
381376
servicePort: {service_port}
382-
parentRefs:
383-
- group: gateway.networking.k8s.io
384-
kind: Gateway
385-
name: infra-{release}-inference-gateway
386377
proxy:
387378
image: "{proxy_image}"
388379
secure: false
389380
connector: {proxy_connector}
390381
debugLevel: {proxy_debug_level}
391-
inferencePool:
392-
create: {inference_pool_create}
393-
name: {model_id_label}-gaie
394-
httpRoute:
395-
create: {route_enabled}
396-
rules:
397-
- backendRefs:
398-
- group: inference.networking.x-k8s.io
399-
kind: InferencePool
400-
name: {model_id_label}-gaie
401-
port: 8000
402-
weight: 1
403-
timeouts:
404-
backendRequest: 0s
405-
request: 0s
406-
matches:
407-
- path:
408-
type: PathPrefix
409-
value: /{model_id}/
410-
filters:
411-
- type: URLRewrite
412-
urlRewrite:
413-
path:
414-
type: ReplacePrefixMatch
415-
replacePrefixMatch: /
416-
{rules_content}
417-
epp:
418-
create: {epp_create}
419382
420383
decode:
421384
create: {decode_create}
@@ -538,6 +501,72 @@ def generate_ms_values_yaml(
538501

539502
return clear_string(yaml_content)
540503

504+
def define_httproute(
505+
ev: dict,
506+
single_model: bool = True
507+
) -> str:
508+
"""
509+
Generate the ms-values.yaml content for Helm chart.
510+
Exactly matches the bash script structure from lines 60-239.
511+
512+
Args:
513+
ev: Environment variables dictionary
514+
single_model: indicates only one model will be deployed
515+
516+
Returns:
517+
YAML manifest for HTTPRoute
518+
"""
519+
release = ev["vllm_modelservice_release"]
520+
namespace = ev.get("vllm_common_namespace", "")
521+
model_id_label = ev.get("deploy_current_model_id_label", "")
522+
service_port = ev.get("vllm_common_inference_port", "8000")
523+
524+
manifest=f"""apiVersion: gateway.networking.k8s.io/v1
525+
kind: HTTPRoute
526+
metadata:
527+
name: {model_id_label}
528+
namespace: {namespace}
529+
spec:
530+
parentRefs:
531+
- group: gateway.networking.k8s.io
532+
kind: Gateway
533+
name: infra-{release}-inference-gateway
534+
rules:
535+
- backendRefs:
536+
- group: inference.networking.x-k8s.io
537+
kind: InferencePool
538+
name: {model_id_label}-gaie
539+
port: {service_port}
540+
weight: 1
541+
timeouts:
542+
backendRequest: 0s
543+
request: 0s
544+
matches:
545+
- path:
546+
type: PathPrefix
547+
value: /{model_id_label}/
548+
filters:
549+
- type: URLRewrite
550+
urlRewrite:
551+
path:
552+
type: ReplacePrefixMatch
553+
replacePrefixMatch: /
554+
"""
555+
# For single model case, create simpler rule
556+
if single_model:
557+
manifest = f"""{manifest}
558+
- backendRefs:
559+
- group: inference.networking.x-k8s.io
560+
kind: InferencePool
561+
name: {model_id_label}-gaie
562+
port: {service_port}
563+
weight: 1
564+
timeouts:
565+
backendRequest: 0s
566+
request: 0s
567+
"""
568+
return manifest
569+
541570
def main():
542571
"""Main function for step 09 - Deploy via modelservice"""
543572

@@ -632,22 +661,7 @@ def main():
632661

633662
# Generate ms-rules.yaml content
634663
rules_file = helm_dir / "ms-rules.yaml"
635-
636-
# For single model, write routing rule; otherwise empty
637-
if len([m for m in model_list if m.strip()]) == 1:
638-
rules_content = f"""- backendRefs:
639-
- group: inference.networking.x-k8s.io
640-
kind: InferencePool
641-
name: {ev["deploy_current_model_id_label"]}-gaie
642-
port: 8000
643-
weight: 1
644-
timeouts:
645-
backendRequest: 0s
646-
request: 0s
647-
"""
648-
rules_file.write_text(rules_content)
649-
else:
650-
rules_file.write_text("")
664+
rules_file.write_text("")
651665

652666
# Generate ms-values.yaml
653667
values_content = generate_ms_values_yaml(ev, mount_model_volume, rules_file)
@@ -681,6 +695,13 @@ def main():
681695
f"✅ {ev['vllm_common_namespace']}-{ev['deploy_current_model_id_label']}-ms helm chart deployed successfully"
682696
)
683697

698+
if ev.get("vllm_modelservice_route", "false"):
699+
announce(f"🚀 Creating HTTPRoute")
700+
api, client = kube_connect(f'{ev["control_work_dir"]}/environment/context.ctx')
701+
httproute_spec = define_httproute(ev, single_model = len([m for m in model_list if m.strip()]) == 1)
702+
announce(f"Creating HTTPRoute: \n{httproute_spec}")
703+
create_httproute(api, httproute_spec, ev["control_dry_run"], ev["control_verbose"])
704+
684705
# Wait for decode pods creation
685706
result = wait_for_pods_creation(
686707
ev, ev["vllm_modelservice_decode_replicas"], "decode"

0 commit comments

Comments
 (0)