@@ -150,7 +150,9 @@ def generate_ms_values_yaml(
150150 decode_replicas = int (ev .get ("vllm_modelservice_decode_replicas" , "0" ))
151151 decode_create = "true" if decode_replicas > 0 else "false"
152152 decode_data_parallelism = ev .get ("vllm_modelservice_decode_data_parallelism" , "1" )
153- decode_tensor_parallelism = ev ["vllm_modelservice_decode_tensor_parallelism" ]
153+ decode_data_local_parallelism = ev .get ("vllm_modelservice_decode_data_local_parallelism" , "1" )
154+ decode_tensor_parallelism = ev .get ("vllm_modelservice_decode_tensor_parallelism" , "1" )
155+ decode_workers_parallelism = ev .get ("vllm_modelservice_decode_worker_parallelism" , "1" )
154156 decode_model_command = ev .get ("vllm_modelservice_decode_model_command" , "" )
155157 decode_extra_args = ev .get ("vllm_modelservice_decode_extra_args" , "" )
156158 decode_inference_port = ev ["vllm_modelservice_decode_inference_port" ]
@@ -159,9 +161,9 @@ def generate_ms_values_yaml(
159161 prefill_replicas = int (ev .get ("vllm_modelservice_prefill_replicas" , "0" ))
160162 prefill_create = "true" if prefill_replicas > 0 else "false"
161163 prefill_data_parallelism = ev .get ("vllm_modelservice_prefill_data_parallelism" , "1" )
162- prefill_tensor_parallelism = ev .get (
163- "vllm_modelservice_prefill_tensor_parallelism" , "1"
164- )
164+ prefill_data_local_parallelism = ev .get ("vllm_modelservice_prefill_data_local_parallelism" , "1" )
165+ prefill_tensor_parallelism = ev . get ( "vllm_modelservice_prefill_tensor_parallelism" , "1" )
166+ prefill_workers_parallelism = ev . get ( "vllm_modelservice_prefill_worker_parallelism" , "1" )
165167 prefill_model_command = ev .get ("vllm_modelservice_prefill_model_command" , "" )
166168 prefill_extra_args = ev .get ("vllm_modelservice_prefill_extra_args" , "" )
167169 prefill_inference_port = ev ["vllm_modelservice_prefill_inference_port" ]
@@ -245,7 +247,9 @@ def generate_ms_values_yaml(
245247{ add_affinity (ev )}
246248 parallelism:
247249 data: { decode_data_parallelism }
250+ dataLocal: { decode_data_local_parallelism }
248251 tensor: { decode_tensor_parallelism }
252+ workers: { decode_workers_parallelism }
249253 annotations:
250254 { add_annotations ("LLMDBENCH_VLLM_COMMON_ANNOTATIONS" ).lstrip ()}
251255 podAnnotations:
@@ -300,7 +304,9 @@ def generate_ms_values_yaml(
300304{ add_affinity (ev )}
301305 parallelism:
302306 data: { prefill_data_parallelism }
307+ dataLocal: { prefill_data_local_parallelism }
303308 tensor: { prefill_tensor_parallelism }
309+ workers: { prefill_workers_parallelism }
304310 annotations:
305311 { add_annotations ("LLMDBENCH_VLLM_COMMON_ANNOTATIONS" ).lstrip ()}
306312 podAnnotations:
0 commit comments