44import sys
55from pathlib import Path
66
7+ import pykube
8+
79# Add project root to path for imports
810current_file = Path (__file__ ).resolve ()
911project_root = current_file .parents [1 ]
3032 add_additional_env_to_yaml ,
3133 add_config ,
3234 clear_string ,
33- install_wva_components
35+ install_wva_components ,
36+ kube_connect ,
37+ create_httproute
3438)
3539
3640
@@ -123,9 +127,6 @@ def generate_ms_values_yaml(
123127
124128 # Routing section
125129 service_port = ev .get ("vllm_common_inference_port" , "8000" )
126- release = ev .get ("vllm_modelservice_release" , "" )
127- route_enabled = ev .get ("vllm_modelservice_route" , "false" )
128- model_id = ev .get ("deploy_current_model_id" , "" )
129130 model_id_label = ev .get ("deploy_current_model_id_label" , "" )
130131
131132 # Image details
@@ -146,10 +147,6 @@ def generate_ms_values_yaml(
146147 proxy_connector = ev .get ("llmd_routingsidecar_connector" , "" )
147148 proxy_debug_level = ev .get ("llmd_routingsidecar_debug_level" , "" )
148149
149- # EPP and routing configuration
150- inference_pool_create = ev .get ("vllm_modelservice_inference_pool" , "true" )
151- epp_create = ev .get ("vllm_modelservice_epp" , "true" )
152-
153150 # Decode configuration
154151 decode_replicas = int (ev .get ("vllm_modelservice_decode_replicas" , "0" ))
155152 decode_create = "true" if decode_replicas > 0 else "false"
@@ -250,11 +247,6 @@ def generate_ms_values_yaml(
250247 # Environment variables to YAML
251248 envvars_to_yaml = ev .get ("vllm_common_envvars_to_yaml" , "" )
252249
253- # Read the rules file content
254- rules_content = ""
255- if rules_file .exists ():
256- rules_content = rules_file .read_text ().rstrip ()
257-
258250 # Build decode resources section cleanly
259251 decode_limits_resources = []
260252 decode_requests_resources = []
@@ -376,46 +368,17 @@ def generate_ms_values_yaml(
376368 size: { model_size }
377369 authSecretName: "llm-d-hf-token"
378370 name: { model_name }
371+ labels:
372+ llm-d.ai/inferenceServing: "true"
373+ llm-d.ai/model: { model_id_label }
379374
380375routing:
381376 servicePort: { service_port }
382- parentRefs:
383- - group: gateway.networking.k8s.io
384- kind: Gateway
385- name: infra-{ release } -inference-gateway
386377 proxy:
387378 image: "{ proxy_image } "
388379 secure: false
389380 connector: { proxy_connector }
390381 debugLevel: { proxy_debug_level }
391- inferencePool:
392- create: { inference_pool_create }
393- name: { model_id_label } -gaie
394- httpRoute:
395- create: { route_enabled }
396- rules:
397- - backendRefs:
398- - group: inference.networking.x-k8s.io
399- kind: InferencePool
400- name: { model_id_label } -gaie
401- port: 8000
402- weight: 1
403- timeouts:
404- backendRequest: 0s
405- request: 0s
406- matches:
407- - path:
408- type: PathPrefix
409- value: /{ model_id } /
410- filters:
411- - type: URLRewrite
412- urlRewrite:
413- path:
414- type: ReplacePrefixMatch
415- replacePrefixMatch: /
416- { rules_content }
417- epp:
418- create: { epp_create }
419382
420383decode:
421384 create: { decode_create }
@@ -538,6 +501,72 @@ def generate_ms_values_yaml(
538501
539502 return clear_string (yaml_content )
540503
504+ def define_httproute (
505+ ev : dict ,
506+ single_model : bool = True
507+ ) -> str :
508+ """
509+ Generate the ms-values.yaml content for Helm chart.
510+ Exactly matches the bash script structure from lines 60-239.
511+
512+ Args:
513+ ev: Environment variables dictionary
514+ single_model: indicates only one model will be deployed
515+
516+ Returns:
517+ YAML manifest for HTTPRoute
518+ """
519+ release = ev ["vllm_modelservice_release" ]
520+ namespace = ev .get ("vllm_common_namespace" , "" )
521+ model_id_label = ev .get ("deploy_current_model_id_label" , "" )
522+ service_port = ev .get ("vllm_common_inference_port" , "8000" )
523+
524+ manifest = f"""apiVersion: gateway.networking.k8s.io/v1
525+ kind: HTTPRoute
526+ metadata:
527+ name: { model_id_label }
528+ namespace: { namespace }
529+ spec:
530+ parentRefs:
531+ - group: gateway.networking.k8s.io
532+ kind: Gateway
533+ name: infra-{ release } -inference-gateway
534+ rules:
535+ - backendRefs:
536+ - group: inference.networking.x-k8s.io
537+ kind: InferencePool
538+ name: { model_id_label } -gaie
539+ port: { service_port }
540+ weight: 1
541+ timeouts:
542+ backendRequest: 0s
543+ request: 0s
544+ matches:
545+ - path:
546+ type: PathPrefix
547+ value: /{ model_id_label } /
548+ filters:
549+ - type: URLRewrite
550+ urlRewrite:
551+ path:
552+ type: ReplacePrefixMatch
553+ replacePrefixMatch: /
554+ """
555+ # For single model case, create simpler rule
556+ if single_model :
557+ manifest = f"""{ manifest }
558+ - backendRefs:
559+ - group: inference.networking.x-k8s.io
560+ kind: InferencePool
561+ name: { model_id_label } -gaie
562+ port: { service_port }
563+ weight: 1
564+ timeouts:
565+ backendRequest: 0s
566+ request: 0s
567+ """
568+ return manifest
569+
541570def main ():
542571 """Main function for step 09 - Deploy via modelservice"""
543572
@@ -632,22 +661,7 @@ def main():
632661
633662 # Generate ms-rules.yaml content
634663 rules_file = helm_dir / "ms-rules.yaml"
635-
636- # For single model, write routing rule; otherwise empty
637- if len ([m for m in model_list if m .strip ()]) == 1 :
638- rules_content = f"""- backendRefs:
639- - group: inference.networking.x-k8s.io
640- kind: InferencePool
641- name: { ev ["deploy_current_model_id_label" ]} -gaie
642- port: 8000
643- weight: 1
644- timeouts:
645- backendRequest: 0s
646- request: 0s
647- """
648- rules_file .write_text (rules_content )
649- else :
650- rules_file .write_text ("" )
664+ rules_file .write_text ("" )
651665
652666 # Generate ms-values.yaml
653667 values_content = generate_ms_values_yaml (ev , mount_model_volume , rules_file )
@@ -681,6 +695,13 @@ def main():
681695 f"✅ { ev ['vllm_common_namespace' ]} -{ ev ['deploy_current_model_id_label' ]} -ms helm chart deployed successfully"
682696 )
683697
698+ if ev .get ("vllm_modelservice_route" , "false" ):
699+ announce (f"🚀 Creating HTTPRoute" )
700+ api , client = kube_connect (f'{ ev ["control_work_dir" ]} /environment/context.ctx' )
701+ httproute_spec = define_httproute (ev , single_model = len ([m for m in model_list if m .strip ()]) == 1 )
702+ announce (f"Creating HTTPRoute: \n { httproute_spec } " )
703+ create_httproute (api , httproute_spec , ev ["control_dry_run" ], ev ["control_verbose" ])
704+
684705 # Wait for decode pods creation
685706 result = wait_for_pods_creation (
686707 ev , ev ["vllm_modelservice_decode_replicas" ], "decode"
0 commit comments