Skip to content

Commit cf7f2be

Browse files
committed
fix multi-gpu llm tests
- use MoE model (Deepseek-V2-Lite) because vllm-project/vllm#30739 changes how vLLM handles DP ranks - overrides dp_size=1 and dp_rank=0 if non-MoE model. - fixes doc/source/llm/doc_code/serve/multi_gpu/dp_basic_example.py and doc/source/llm/doc_code/serve/multi_gpu/dp_pd_example.py Signed-off-by: Nikhil Ghosh <nikhil@anyscale.com>
1 parent 85b904b commit cf7f2be

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

doc/source/llm/doc_code/serve/multi_gpu/dp_basic_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def _testing_build_dp_openai_app(builder_config, **kwargs):
4242
# Configure the model with data parallel settings
4343
config = LLMConfig(
4444
model_loading_config={
45-
"model_id": "Qwen/Qwen2.5-0.5B-Instruct"
45+
"model_id": "deepseek-ai/DeepSeek-V2-Lite"
4646
},
4747
engine_kwargs={
4848
"data_parallel_size": 2, # Number of DP replicas

doc/source/llm/doc_code/serve/multi_gpu/dp_pd_example.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def _testing_build_dp_deployment(llm_config, **kwargs):
5757
# Configure prefill with data parallel attention
5858
prefill_config = LLMConfig(
5959
model_loading_config={
60-
"model_id": "Qwen/Qwen2.5-0.5B-Instruct"
60+
"model_id": "deepseek-ai/DeepSeek-V2-Lite"
6161
},
6262
engine_kwargs={
6363
"data_parallel_size": 2, # 2 DP replicas for prefill
@@ -78,7 +78,7 @@ def _testing_build_dp_deployment(llm_config, **kwargs):
7878
# Configure decode with data parallel attention
7979
decode_config = LLMConfig(
8080
model_loading_config={
81-
"model_id": "Qwen/Qwen2.5-0.5B-Instruct"
81+
"model_id": "deepseek-ai/DeepSeek-V2-Lite"
8282
},
8383
engine_kwargs={
8484
"data_parallel_size": 2, # 2 DP replicas for decode (adjusted for 4 GPU limit)

0 commit comments

Comments
 (0)