@@ -46,7 +46,7 @@ def create_benchmark_command() -> str:
4646 """Create command for running benchmark"""
4747 return f"""
4848 python3 /fsx/vllm-dlc/vllm/benchmarks/benchmark_serving.py \
49- --model deepseek-ai/DeepSeek-R1-Distill-Qwen-7B \
49+ --model deepseek-ai/DeepSeek-R1-Distill-Qwen-32B \
5050 --backend vllm \
5151 --base-url "http://localhost:8000" \
5252 --endpoint '/v1/completions' \
@@ -145,8 +145,6 @@ def test_vllm_benchmark_on_multi_node(head_connection, worker_connection, image_
145145 # add timer to let container run
146146 time .sleep (30 )
147147
148- serve_command = f"vllm serve { MODEL_NAME } --tensor-parallel-size 8 --pipeline-parallel-size 2 --max-num-batched-tokens 16384"
149-
150148 commands = ["ray status" , "fi_info -p efa" ]
151149 for command in commands :
152150 head_connection .run (f"docker exec -i { container_name } /bin/bash -c '{ command } '" )
@@ -160,6 +158,8 @@ def test_vllm_benchmark_on_multi_node(head_connection, worker_connection, image_
160158 if not wait_for_container_ready (head_connection , container_name , timeout = 2000 ):
161159 raise Exception ("Container failed to become ready within timeout period" )
162160
161+ time .sleep (100 )
162+
163163 print ("Running benchmark..." )
164164 benchmark_cmd = "source vllm_env/bin/activate &&" + create_benchmark_command ()
165165 benchmark_result = head_connection .run (benchmark_cmd , timeout = 7200 )
@@ -397,7 +397,7 @@ def test_vllm_on_ec2(resources, image_uri):
397397
398398 print ("\n === Test Summary ===" )
399399 print (f"EFA tests: { 'Passed' if test_results ['efa' ] else 'Not Run/Failed' } " )
400- # print(f"Single-node test: {'Passed' if test_results['single_node'] else 'Failed'}")
400+ print (f"Single-node test: { 'Passed' if test_results ['single_node' ] else 'Failed' } " )
401401 print (f"Multi-node test: { 'Passed' if test_results ['multi_node' ] else 'Failed' } " )
402402
403403 if not any (test_results .values ()):
0 commit comments