Skip to content

Commit 4e08e03

Browse files
test efa and multinode
1 parent 4553623 commit 4e08e03

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

test/vllm/ec2/test_artifacts/test_ec2.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,6 @@ def create_benchmark_command(model_name: str) -> str:
5555
"""
5656

5757

58-
def create_serve_command(model_name: str) -> str:
59-
return f"""vllm serve {model_name} \
60-
--tensor-parallel-size 8 \
61-
--pipeline-parallel-size 2 \
62-
--max-num-batched-tokens 16384"""
63-
64-
6558
def get_secret_hf_token():
6659

6760
secret_name = "test/hf_token"
@@ -163,9 +156,16 @@ def test_vllm_benchmark_on_multi_node(head_connection, worker_connection, image_
163156
head_container_id = get_container_id(head_connection, image_uri)
164157
print("Starting model serving inside Ray container...")
165158

166-
serve_cmd = create_serve_command(model_name)
167-
serve_in_container = f"tmux new-session -d -s ray_head 'docker exec -it {head_container_id} /bin/bash -c \"{serve_cmd}\"'"
168-
head_connection.run(serve_in_container)
159+
commands_serving = [
160+
"tmux new-session -d -s vllm_serve",
161+
"tmux ls",
162+
"tmux attach-session -t vllm_serve",
163+
f'docker exec -it {head_container_id} /bin/bash -c "vllm serve {model_name} \
164+
--tensor-parallel-size 8 \
165+
--pipeline-parallel-size 2 \
166+
--max-num-batched-tokens 16384"',
167+
]
168+
head_connection.run("; ".join(commands_serving))
169169

170170
print("Waiting for model to load (15 minutes)...")
171171
time.sleep(1000)

0 commit comments

Comments
 (0)