format logs

Jyothirmaikottu · Jyothirmaikottu · commit 8fc4c14b602f · 2025-09-08T18:37:53.000-07:00
diff --git a/test/vllm/ec2/utils/run_vllm_on_arm64.sh b/test/vllm/ec2/utils/run_vllm_on_arm64.sh
@@ -51,7 +51,8 @@ handle_error() {
 trap cleanup EXIT
 trap 'handle_error $LINENO' ERR
 
-echo "Running initial inference check..."
+echo "####################### RUNNING INFERENCE CHECK ########################################"
+
 docker run --rm \
     -v /fsx/vllm-dlc/vllm:/vllm \
     --entrypoint /bin/bash \
@@ -67,7 +68,8 @@ docker run --rm \
         --tensor-parallel-size 1 \
         --max-model-len 2048"
 
-echo "Starting VLLM server..."
+echo "####################### Starting VLLM server ##########################################"
+
 docker run -d \
     -v /fsx/vllm-dlc/vllm:/vllm \
     --name ${CONTAINER_NAME} \
@@ -89,16 +91,27 @@ docker run -d \
 wait_for_api
 docker logs "${CONTAINER_NAME}"
 
-echo "VLLM server is running and responding to requests!"
+echo "####################### BENCHMARKING ################################################"
+
+python3 /fsx/vllm-dlc/vllm/benchmarks/benchmark_serving.py \
+    --backend vllm \
+    --base-url "http://localhost:8000" \
+    --endpoint '/v1/completions' \
+    --model $MODEL_NAME \
+    --dataset-name random \
+    --random-input-len 128 \
+    --random-output-len 128 \
+    --num-prompts 50 \
+    --max-concurrency 4 \
+    --temperature 0.7
+
+echo "####################### TESTING TOOL CALLS (OPEN AI API) ###########################"
 
-echo "Installing Python dependencies..."
 python -m venv .venv
 source .venv/bin/activate  
 
-pip install "openai>=1.0.0" autogen-agentchat~=0.2 autogen ag2[openai] pyautogen
-
-echo "Running agent tests..."
+pip install "openai>=1.0.0"
 python3 test_agents.py
-echo "Testing completed successfully!"
+deactivate
 
-deactivate
+echo "####################### Testing completed successfully ###########################"
diff --git a/test/vllm/ec2/utils/test_agents.py b/test/vllm/ec2/utils/test_agents.py
@@ -1,21 +1,44 @@
-from autogen import UserProxyAgent, ConversableAgent
-
-local_llm_config = {
-    "config_list": [
-        {
-            "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",  # Same as in vLLM command
-            "api_key": "NotRequired",  # Not needed
-            "base_url": "http://localhost:8000/v1",  # Your vLLM URL, with '/v1' added
-        }
-    ],
-    "cache_seed": None,  # Turns off caching, useful for testing different models
-}
-
-# Create the agent that uses the LLM.
-assistant = ConversableAgent("agent", llm_config=local_llm_config, system_message="")
-
-# Create the agent that represents the user in the conversation.
-user_proxy = UserProxyAgent("user", code_execution_config=False, system_message="")
-
-# Let the assistant start the conversation.  It will end when the user types exit.
-assistant.initiate_chat(user_proxy, message="How can I help you today?")
+from openai import OpenAI
+import json
+
+client = OpenAI(base_url="http://localhost:8000/v1", api_key="dummy")
+
+
+def get_weather(location: str, unit: str):
+    return f"Getting the weather for {location} in {unit}..."
+
+
+tool_functions = {"get_weather": get_weather}
+
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "City and state, e.g., 'San Francisco, CA'",
+                    },
+                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                },
+                "required": ["location", "unit"],
+            },
+        },
+    }
+]
+
+response = client.chat.completions.create(
+    model=client.models.list().data[0].id,
+    messages=[{"role": "user", "content": "What's the weather like in San Francisco?"}],
+    tools=tools,
+    tool_choice="auto",
+)
+
+tool_call = response.choices[0].message.tool_calls[0].function
+print(f"Function called: {tool_call.name}")
+print(f"Arguments: {tool_call.arguments}")
+print(f"Result: {tool_functions[tool_call.name](**json.loads(tool_call.arguments))}")