Skip to content

Commit 02906da

Browse files
test vllm with autogen
1 parent ebe45d9 commit 02906da

File tree

2 files changed

+25
-90
lines changed

2 files changed

+25
-90
lines changed

test/vllm/ec2/utils/run_vllm_on_arm64.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ docker run --rm \
6969

7070
echo "Starting VLLM server..."
7171
docker run -d \
72+
-v /fsx/vllm-dlc/vllm:/vllm \
7273
--name ${CONTAINER_NAME} \
7374
-p ${PORT}:8000 \
7475
--entrypoint /bin/bash \
@@ -82,7 +83,8 @@ docker run -d \
8283
--dtype float16 \
8384
--gpu-memory-utilization 0.7 \
8485
--max-model-len 6000 \
85-
--enforce-eager"
86+
--enforce-eager
87+
--chat-template /vllm/examples/tool_chat_template_deepseekr1.jinja"
8688

8789
wait_for_api
8890
docker logs "${CONTAINER_NAME}"
@@ -93,8 +95,7 @@ echo "Installing Python dependencies..."
9395
python -m venv .venv
9496
source .venv/bin/activate
9597

96-
pip install openai
97-
pip install strands-agents strands-agents-tools
98+
pip install autogen-agentchat~=0.2
9899

99100
echo "Running agent tests..."
100101
python3 test_agents.py

test/vllm/ec2/utils/test_agents.py

Lines changed: 21 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,87 +1,21 @@
1-
# test_vllm_agent.py
2-
from openai import OpenAI
3-
from strands import Agent
4-
from strands_tools import calculator, current_time
5-
6-
from pydantic import BaseModel, Field
7-
import logging
8-
9-
# Enable logging
10-
logging.basicConfig(level=logging.INFO)
11-
12-
OPENAI_API_KEY = "EMPTY"
13-
OPENAI_API_BASE = "http://localhost:8000/v1"
14-
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
15-
16-
17-
class AnalysisResult(BaseModel):
18-
"""Analysis result structure"""
19-
20-
summary: str = Field(description="Main summary of the analysis")
21-
key_points: list[str] = Field(description="Key points extracted")
22-
confidence: float = Field(description="Confidence score (0-1)", ge=0, le=1)
23-
24-
25-
def test_direct_completion():
26-
client = OpenAI(
27-
api_key=OPENAI_API_KEY,
28-
base_url=OPENAI_API_BASE,
29-
)
30-
31-
prompt = "What are the main benefits of using VLLM for inference?"
32-
33-
chat_response = client.completions.create(
34-
model=MODEL_NAME,
35-
prompt=prompt,
36-
temperature=0.7,
37-
max_tokens=512,
38-
)
39-
40-
print("\n API Response:")
41-
print(chat_response.choices[0].text)
42-
return client
43-
44-
45-
def main():
46-
try:
47-
# Test direct API first
48-
model_client = test_direct_completion()
49-
50-
# Create agent with the model
51-
agent = Agent(model=model_client, tools=[calculator, current_time])
52-
53-
print("\nAgent initialized successfully!")
54-
55-
# Test 1: Basic Agent Interaction
56-
print("\nTest 1: Basic Agent Interaction")
57-
response = agent("What are the main benefits of using VLLM for inference?")
58-
print(f"Agent Response: {response}")
59-
60-
# Test 2: Tool Usage
61-
print("\nTest 2: Tool Usage")
62-
tool_response = agent("What's the square root of 144 and what's the current time?")
63-
print(f"Tool Response: {tool_response}")
64-
65-
# Test 3: Structured Output
66-
print("\nTest 3: Structured Output")
67-
analysis_prompt = """
68-
Analyze this technical concept:
69-
VLLM is a high-performance library for LLM inference and serving,
70-
featuring state-of-the-art scheduling and optimization techniques.
71-
"""
72-
73-
result = agent.structured_output(AnalysisResult, analysis_prompt)
74-
75-
print("Analysis Results:")
76-
print(f"Summary: {result.summary}")
77-
print(f"Key Points: {result.key_points}")
78-
print(f"Confidence: {result.confidence}")
79-
80-
except Exception as e:
81-
print(f"Error occurred: {str(e)}")
82-
logging.error(f"Detailed error: {e}", exc_info=True)
83-
84-
85-
if __name__ == "__main__":
86-
print("Starting VLLM Agent Test...")
87-
main()
1+
from autogen import UserProxyAgent, ConversableAgent
2+
3+
local_llm_config = {
4+
"config_list": [
5+
{
6+
"model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", # Same as in vLLM command
7+
"api_key": "NotRequired", # Not needed
8+
"base_url": "http://localhost:8000/v1", # Your vLLM URL, with '/v1' added
9+
}
10+
],
11+
"cache_seed": None, # Turns off caching, useful for testing different models
12+
}
13+
14+
# Create the agent that uses the LLM.
15+
assistant = ConversableAgent("agent", llm_config=local_llm_config, system_message="")
16+
17+
# Create the agent that represents the user in the conversation.
18+
user_proxy = UserProxyAgent("user", code_execution_config=False, system_message="")
19+
20+
# Let the assistant start the conversation. It will end when the user types exit.
21+
assistant.initiate_chat(user_proxy, message="How can I help you today?")

0 commit comments

Comments
 (0)