Skip to content

Commit 8fc4c14

Browse files
format logs
1 parent 22ffd7f commit 8fc4c14

File tree

2 files changed

+66
-30
lines changed

2 files changed

+66
-30
lines changed

test/vllm/ec2/utils/run_vllm_on_arm64.sh

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ handle_error() {
5151
trap cleanup EXIT
5252
trap 'handle_error $LINENO' ERR
5353

54-
echo "Running initial inference check..."
54+
echo "####################### RUNNING INFERENCE CHECK ########################################"
55+
5556
docker run --rm \
5657
-v /fsx/vllm-dlc/vllm:/vllm \
5758
--entrypoint /bin/bash \
@@ -67,7 +68,8 @@ docker run --rm \
6768
--tensor-parallel-size 1 \
6869
--max-model-len 2048"
6970

70-
echo "Starting VLLM server..."
71+
echo "####################### Starting VLLM server ##########################################"
72+
7173
docker run -d \
7274
-v /fsx/vllm-dlc/vllm:/vllm \
7375
--name ${CONTAINER_NAME} \
@@ -89,16 +91,27 @@ docker run -d \
8991
wait_for_api
9092
docker logs "${CONTAINER_NAME}"
9193

92-
echo "VLLM server is running and responding to requests!"
94+
echo "####################### BENCHMARKING ################################################"
95+
96+
python3 /fsx/vllm-dlc/vllm/benchmarks/benchmark_serving.py \
97+
--backend vllm \
98+
--base-url "http://localhost:8000" \
99+
--endpoint '/v1/completions' \
100+
--model $MODEL_NAME \
101+
--dataset-name random \
102+
--random-input-len 128 \
103+
--random-output-len 128 \
104+
--num-prompts 50 \
105+
--max-concurrency 4 \
106+
--temperature 0.7
107+
108+
echo "####################### TESTING TOOL CALLS (OPEN AI API) ###########################"
93109

94-
echo "Installing Python dependencies..."
95110
python -m venv .venv
96111
source .venv/bin/activate
97112

98-
pip install "openai>=1.0.0" autogen-agentchat~=0.2 autogen ag2[openai] pyautogen
99-
100-
echo "Running agent tests..."
113+
pip install "openai>=1.0.0"
101114
python3 test_agents.py
102-
echo "Testing completed successfully!"
115+
deactivate
103116

104-
deactivate
117+
echo "####################### Testing completed successfully ###########################"

test/vllm/ec2/utils/test_agents.py

Lines changed: 44 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,44 @@
1-
from autogen import UserProxyAgent, ConversableAgent
2-
3-
local_llm_config = {
4-
"config_list": [
5-
{
6-
"model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", # Same as in vLLM command
7-
"api_key": "NotRequired", # Not needed
8-
"base_url": "http://localhost:8000/v1", # Your vLLM URL, with '/v1' added
9-
}
10-
],
11-
"cache_seed": None, # Turns off caching, useful for testing different models
12-
}
13-
14-
# Create the agent that uses the LLM.
15-
assistant = ConversableAgent("agent", llm_config=local_llm_config, system_message="")
16-
17-
# Create the agent that represents the user in the conversation.
18-
user_proxy = UserProxyAgent("user", code_execution_config=False, system_message="")
19-
20-
# Let the assistant start the conversation. It will end when the user types exit.
21-
assistant.initiate_chat(user_proxy, message="How can I help you today?")
1+
from openai import OpenAI
2+
import json
3+
4+
client = OpenAI(base_url="http://localhost:8000/v1", api_key="dummy")
5+
6+
7+
def get_weather(location: str, unit: str):
8+
return f"Getting the weather for {location} in {unit}..."
9+
10+
11+
tool_functions = {"get_weather": get_weather}
12+
13+
tools = [
14+
{
15+
"type": "function",
16+
"function": {
17+
"name": "get_weather",
18+
"description": "Get the current weather in a given location",
19+
"parameters": {
20+
"type": "object",
21+
"properties": {
22+
"location": {
23+
"type": "string",
24+
"description": "City and state, e.g., 'San Francisco, CA'",
25+
},
26+
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
27+
},
28+
"required": ["location", "unit"],
29+
},
30+
},
31+
}
32+
]
33+
34+
response = client.chat.completions.create(
35+
model=client.models.list().data[0].id,
36+
messages=[{"role": "user", "content": "What's the weather like in San Francisco?"}],
37+
tools=tools,
38+
tool_choice="auto",
39+
)
40+
41+
tool_call = response.choices[0].message.tool_calls[0].function
42+
print(f"Function called: {tool_call.name}")
43+
print(f"Arguments: {tool_call.arguments}")
44+
print(f"Result: {tool_functions[tool_call.name](**json.loads(tool_call.arguments))}")

0 commit comments

Comments
 (0)