@@ -73,7 +73,7 @@ def main():
7373 yamls_dir .mkdir (parents = True , exist_ok = True )
7474
7575 # Process each model - First pass: Deploy resources
76- model_list = ev . get ( "deploy_model_list" , "" ) .replace ("," , " " ).split ()
76+ model_list = ev [ "deploy_model_list" ] .replace ("," , " " ).split ()
7777 for model in model_list :
7878 # Generate filename-safe model name
7979 modelfn = model .replace ("/" , "___" )
@@ -107,6 +107,17 @@ def main():
107107 kubectl_service_cmd = f"{ ev ['control_kcmd' ]} apply -f { service_file } "
108108 llmdbench_execute_cmd (actual_cmd = kubectl_service_cmd , dry_run = ev ["control_dry_run" ], verbose = ev ["control_verbose" ], fatal = True )
109109
110+ # Optional PodMonitor for Prometheus scraping
111+ if ev ["vllm_monitoring_podmonitor_enabled" ] == "true" :
112+ podmonitor_yaml = generate_podmonitor_yaml (ev , model , model_label )
113+ podmonitor_file = yamls_dir / f"{ ev ['current_step' ]} _c_podmonitor_{ modelfn } .yaml"
114+ with open (podmonitor_file , 'w' ) as f :
115+ f .write (podmonitor_yaml )
116+
117+ kubectl_podmonitor_cmd = f"{ ev ['control_kcmd' ]} apply -f { podmonitor_file } "
118+ llmdbench_execute_cmd (actual_cmd = kubectl_podmonitor_cmd , dry_run = ev ["control_dry_run" ], verbose = ev ["control_verbose" ], fatal = False )
119+ announce (f"📊 PodMonitor for \" { model } \" created for Prometheus scraping" )
120+
110121 # Optional HTTPRoute for OpenShift
111122 srl = "deployment,service,pods,secrets"
112123 if ev ["control_deploy_is_openshift" ] == "1" :
@@ -169,7 +180,7 @@ def main():
169180 propagate_standup_parameters (ev , api )
170181
171182 else :
172- deploy_methods = ev . get ( "deploy_methods" , "" )
183+ deploy_methods = ev [ "deploy_methods" ]
173184 announce (f"⏭️ Environment types are \" { deploy_methods } \" . Skipping this step." )
174185
175186 return 0
@@ -254,11 +265,12 @@ def generate_deployment_yaml(ev, model, model_label):
254265 - name: HUGGING_FACE_HUB_TOKEN
255266 valueFrom:
256267 secretKeyRef:
257- name: { ev . get ( 'vllm_common_hf_token_name' , '' ) }
268+ name: { ev [ 'vllm_common_hf_token_name' ] }
258269 key: HF_TOKEN
259270{ additional_env }
260271 ports:
261272 - containerPort: { ev ['vllm_common_inference_port' ]}
273+ name: metrics
262274 startupProbe:
263275 httpGet:
264276 path: { ev ["vllm_standalone_startup_probe_path" ]}
@@ -309,7 +321,7 @@ def generate_deployment_yaml(ev, model, model_label):
309321 - name: HUGGING_FACE_HUB_TOKEN
310322 valueFrom:
311323 secretKeyRef:
312- name: { ev . get ( 'vllm_common_hf_token_name' , '' ) }
324+ name: { ev [ 'vllm_common_hf_token_name' ] }
313325 key: HF_TOKEN
314326{ additional_env }
315327 ports:
@@ -382,11 +394,34 @@ def generate_service_yaml(ev, model, model_label):
382394"""
383395 return service_yaml
384396
397+ def generate_podmonitor_yaml (ev , model , model_label ):
398+ """Generate Kubernetes PodMonitor YAML for Prometheus to scrape vLLM standalone model metrics."""
399+
400+ podmonitor_yaml = f"""apiVersion: monitoring.coreos.com/v1
401+ kind: PodMonitor
402+ metadata:
403+ name: vllm-standalone-{ model_label }
404+ namespace: { ev ['vllm_common_namespace' ]}
405+ labels:
406+ stood-up-by: "{ ev ['control_username' ]} "
407+ stood-up-from: llm-d-benchmark
408+ stood-up-via: "{ ev ['deploy_methods' ]} "
409+ spec:
410+ selector:
411+ matchLabels:
412+ app: vllm-standalone-{ model_label }
413+ podMetricsEndpoints:
414+ - port: metrics
415+ path: { ev ['vllm_monitoring_metrics_path' ]}
416+ interval: { ev ['vllm_monitoring_scrape_interval' ]}
417+ """
418+ return podmonitor_yaml
419+
385420def generate_httproute_yaml (ev , model , model_label ):
386421 """Generate HTTPRoute YAML for vLLM standalone model."""
387422
388423 # Extract cluster URL for hostname
389- cluster_url = ev . get ( "cluster_url" , "" ) .replace ("https://api." , "" )
424+ cluster_url = ev [ "cluster_url" ] .replace ("https://api." , "" )
390425
391426 # Get model attributes for backend reference
392427 model_parameters = model_attribute (model , "parameters" )
0 commit comments