@@ -291,7 +291,7 @@ def generate_ms_values_yaml(
291291 readinessProbe:
292292 httpGet:
293293 path: /health
294- port: { decode_inference_port }
294+ port: { common_inference_port }
295295 failureThreshold: 3
296296 periodSeconds: 5
297297 { add_config (decode_extra_container_config , 6 ).lstrip ()}
@@ -351,7 +351,7 @@ def generate_ms_values_yaml(
351351 readinessProbe:
352352 httpGet:
353353 path: /health
354- port: { prefill_inference_port }
354+ port: { common_inference_port }
355355 failureThreshold: 3
356356 periodSeconds: 5
357357 { add_config (prefill_extra_container_config , 6 ).lstrip ()}
@@ -561,15 +561,21 @@ def main():
561561
562562 # Wait for decode pods to be created, running, and ready
563563 api_client = client .CoreV1Api ()
564+ expected_num_decode_pods = ev ["vllm_modelservice_decode_replicas" ]
565+ if ev .get ("vllm_modelservice_multinode" , "false" ):
566+ expected_num_decode_pods = int (ev .get ("vllm_modelservice_decode_num_workers_parallelism" , "1" )) * int (expected_num_decode_pods )
564567 result = wait_for_pods_created_running_ready (
565- api_client , ev , ev [ "vllm_modelservice_decode_replicas" ] , "decode"
568+ api_client , ev , expected_num_decode_pods , "decode"
566569 )
567570 if result != 0 :
568571 return result
569572
570573 # Wait for prefill pods to be created, running, and ready
574+ expected_num_prefill_pods = ev ["vllm_modelservice_prefill_replicas" ]
575+ if ev .get ("vllm_modelservice_multinode" , "false" ):
576+ expected_num_prefill_pods = int (ev .get ("vllm_modelservice_prefill_num_workers_parallelism" , "1" )) * int (expected_num_prefill_pods )
571577 result = wait_for_pods_created_running_ready (
572- api_client , ev , ev [ "vllm_modelservice_prefill_replicas" ] , "prefill"
578+ api_client , ev , expected_num_prefill_pods , "prefill"
573579 )
574580 if result != 0 :
575581 return result
0 commit comments