ai-dynamo
diff --git a/‎docs/tutorial.md‎
Lines changed: 74 additions & 43 deletions b/‎docs/tutorial.md‎
Lines changed: 74 additions & 43 deletions
diff --git a/‎tests/ci/basic_end_to_end/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/ci/basic_end_to_end/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/ci/basic_end_to_end/aiperf_manager.py‎
Lines changed: 156 additions & 0 deletions b/‎tests/ci/basic_end_to_end/aiperf_manager.py‎
Lines changed: 156 additions & 0 deletions
@@ -26,11 +26,25 @@ models using various inference solutions.
 
 </br>
 
+<!-- aiperf-setup -->
+```bash
+#install and setup aiperf
+# Create and activate virtual environment
+python3 -m venv .venv
+source .venv/bin/activate
+
+# Install aiperf from GitHub
+pip install git+https://github.com/ai-dynamo/aiperf.git
+
+```
+<!-- /aiperf-setup -->
+
 ## Profile Qwen3-0.6B using Dynamo <a id="dynamo-qwen3-0.6B">
 
 > [!NOTE]
 > The most up to date installation instructions for Dynamo are available on [Github](https://github.com/ai-dynamo/dynamo?tab=readme-ov-file#1-initial-setup)
 
+<!-- setup-dynamo-default-openai-endpoint-server -->
 ```bash
 # set environment variables
 export AIPERF_REPO_TAG="main"
@@ -54,76 +68,93 @@ docker run \
   --gpus all \
   --network host \
   ${DYNAMO_PREBUILT_IMAGE_TAG} \
-    /bin/bash -c "python3 -m dynamo.frontend & python3 -m dynamo.vllm --model ${MODEL} --enforce-eager --no-enable-prefix-caching" > server.log 2>&1 &
-
-# Set up AIPerf
-docker run \
-  -it \
-  --rm \
-  --gpus all \
-  --network host \
-  -e AIPERF_REPO_TAG=${AIPERF_REPO_TAG} \
-  -e MODEL=${MODEL} \
-  ubuntu:24.04
-
-apt update && apt install -y curl git
-
-curl -LsSf https://astral.sh/uv/install.sh | sh
-
-source $HOME/.local/bin/env
-
-uv venv --python 3.10
-
-source .venv/bin/activate
-
-git clone -b ${AIPERF_REPO_TAG} --depth 1 https://github.com/ai-dynamo/aiperf.git
+    /bin/bash -c "python3 -m dynamo.frontend & python3 -m dynamo.vllm --model ${MODEL} --enforce-eager --no-enable-prefix-caching" 
+```
+<!-- /setup-dynamo-default-openai-endpoint-server -->
 
-uv pip install ./aiperf
 
-# At this point, Dynamo may not be ready.
-# The following command will return when Dynamo is ready for requests.
-while [ "$(curl -s -o /dev/null -w '%{http_code}' localhost:8080/v1/chat/completions -H 'Content-Type: application/json' -d '{"model":"'"${MODEL}"'","messages":[{"role":"user","content":"a"}],"max_completion_tokens":1}')" != "200" ]; do sleep 1; done
+<!-- health-check-dynamo-default-openai-endpoint-server -->
+```bash
+# At this point, Dynamo server may not be ready.
+# The following command will return when Dynamo server is ready for requests.
+# Try for 5 minutes (20 attempts at 15-second intervals)
+attempt=0
+while [ $attempt -lt 20 ] && [ "$(curl -s -o /dev/null -w '%{http_code}' localhost:8080/v1/chat/completions -H 'Content-Type: application/json' -d '{"model":"'"${MODEL}"'","messages":[{"role":"user","content":"a"}],"max_completion_tokens":1}')" != "200" ]; do
+  echo "Waiting for Dynamo server to be ready..."
+  sleep 15
+  attempt=$((attempt + 1))
+done
+```
+<!-- /health-check-dynamo-default-openai-endpoint-server -->
 
 # Profile the model
+
+<!-- aiperf-run-dynamo-default-openai-endpoint-server -->
+```bash
 aiperf profile \
     --model Qwen/Qwen3-0.6B \
     --endpoint-type chat \
     --endpoint /v1/chat/completions \
     --streaming \
     --url localhost:8080 \
-    --synthetic-input-tokens-mean 1000 \
+    --synthetic-input-tokens-mean 10 \
     --synthetic-input-tokens-stddev 0 \
-    --output-tokens-mean 2000 \
+    --output-tokens-mean 20 \
     --output-tokens-stddev 0 \
-    --extra-inputs min_tokens:2000 \
+    --extra-inputs min_tokens:2 \
     --extra-inputs ignore_eos:true \
-    --concurrency 2048 \
-    --request-count 6144 \
-    --warmup-request-count 1000 \
+    --concurrency 2 \
+    --request-count 32 \
+    --warmup-request-count 2 \
     --conversation-num 8000 \
-    --random-seed 100 \
+    --random-seed 8 \
     -v \
     -H 'Authorization: Bearer NOT USED' \
     -H 'Accept: text/event-stream'
 ```
+<!-- /aiperf-run-dynamo-default-openai-endpoint-server -->
 
 ## Profile Qwen3-0.6B using vllm <a id="vllm-qwen3-0.6B">
+
+<!-- setup-vllm-default-openai-endpoint-server -->
 ```bash
-# Install vLLM from pip:
-pip install vllm
+# Create Python virtual environment for vLLM
+docker pull vllm/vllm-openai:latest
+docker run --gpus all -p 8000:8000 vllm/vllm-openai:latest \
+  --model Qwen/Qwen3-0.6B \
+  --host 0.0.0.0 --port 8000
+```
+<!-- /setup-vllm-default-openai-endpoint-server -->
 
-# Load and run the model:
-vllm serve "Qwen/Qwen3-0.6B"
+<!-- health-check-vllm-default-openai-endpoint-server -->
+```bash
+# At this point, vLLM server may not be ready.
+# The following command will return when vLLM server is ready for requests.
+
+MODEL="Qwen/Qwen3-0.6B"
+# Try for 5 minutes (20 attempts at 15-second intervals)
+attempt=0
+while [ $attempt -lt 20 ] && [ "$(curl -s -o /dev/null -w '%{http_code}' \
+  http://localhost:8000/v1/chat/completions \
+  -H 'Content-Type: application/json' \
+  -d '{"model":"'"${MODEL}"'","messages":[{"role":"user","content":"ping"}],"max_completion_tokens":1}')" != "200" ]; do
+  echo "Waiting for vLLM server to be ready..."
+  sleep 15
+  attempt=$((attempt + 1))
+done
+```
+<!-- health-check-vllm-default-openai-endpoint-server -->
 
-uv venv
-source .venv/bin/activate
-pip install git+https://github.com/ai-dynamo/aiperf.git
 
+<!-- aiperf-run-vllm-default-openai-endpoint-server -->
+```bash
 aiperf profile \
     --model Qwen/Qwen3-0.6B \
+    --url localhost:8000 \
     --endpoint-type chat \
     --endpoint /v1/chat/completions \
     --streaming \
-    --request-rate 1000 \
-    --request-count 6500
+    --request-rate 2 \
+    --request-count 8
 ```
+<!-- /aiperf-run-vllm-default-openai-endpoint-server -->
@@ -0,0 +1 @@
+# Test framework for markdown-based CI testing
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+"""
+AIPerf Manager - Handles AIPerf setup and test execution
+"""
+
+import logging
+from typing import List, Dict, Optional
+from dataclasses import dataclass
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class AIPerfTestResult:
+    """Result of an AIPerf test run"""
+    server_id: str
+    command: 'MarkdownCommand'
+    success: bool
+    execution_time: float
+    error_message: Optional[str] = None
+
+class AIPerfManager:
+    """Manages AIPerf setup and test execution"""
+    
+    def __init__(self, server_manager):
+        self.server_manager = server_manager
+        self.setup_completed = False
+        self.setup_command = None
+        self.test_results: List[AIPerfTestResult] = []
+    
+    def discover_aiperf_commands(self, commands: List['MarkdownCommand']) -> None:
+        """Discover AIPerf setup and run commands"""
+        logger.info("Discovering AIPerf commands...")
+        
+        for cmd in commands:
+            if cmd.tag_name == 'aiperf-setup':
+                self.setup_command = cmd
+                logger.info(f"Found AIPerf setup command: {cmd.file_path}:{cmd.start_line}")
+        
+        if not self.setup_command:
+            logger.warning("No AIPerf setup command found")
+    
+    def setup_aiperf(self) -> bool:
+        """Setup AIPerf (run only once)"""
+        if self.setup_completed:
+            logger.info("AIPerf already set up, skipping...")
+            return True
+        
+        if not self.setup_command:
+            logger.error("No AIPerf setup command available")
+            return False
+        
+        logger.info("Setting up AIPerf...")
+        success = self.server_manager._execute_command(self.setup_command, timeout=120)  # 2 minute timeout for setup
+        
+        if success:
+            self.setup_completed = True
+            logger.info("AIPerf setup completed successfully")
+        else:
+            logger.error("AIPerf setup failed")
+        
+        return success
+    
+    def run_tests_for_server(self, server_id: str) -> List[AIPerfTestResult]:
+        """Run all AIPerf tests for a specific server"""
+        if not self.setup_completed:
+            logger.error("AIPerf not set up yet")
+            return []
+        
+        if server_id not in self.server_manager.servers:
+            logger.error(f"Server {server_id} not found")
+            return []
+        
+        server = self.server_manager.servers[server_id]
+        if not server.aiperf_run_commands:
+            logger.info(f"No AIPerf run commands found for server {server_id}")
+            return []
+        
+        logger.info(f"Running {len(server.aiperf_run_commands)} AIPerf tests for server {server_id}")
+        
+        results = []
+        for i, cmd in enumerate(server.aiperf_run_commands, 1):
+            logger.info(f"Running AIPerf test {i}/{len(server.aiperf_run_commands)} for {server_id}")
+            
+            import time
+            start_time = time.time()
+            success = self.server_manager._execute_command(cmd, timeout=120)  # 2 minute timeout for tests
+            execution_time = time.time() - start_time
+            
+            result = AIPerfTestResult(
+                server_id=server_id,
+                command=cmd,
+                success=success,
+                execution_time=execution_time,
+                error_message=None if success else f"Test failed for {cmd.tag_name}"
+            )
+            results.append(result)
+            self.test_results.append(result)
+            
+            if success:
+                logger.info(f"AIPerf test {i} completed successfully in {execution_time:.2f}s")
+            else:
+                logger.error(f"AIPerf test {i} failed after {execution_time:.2f}s")
+        
+        return results
+    
+    def run_all_tests(self) -> Dict[str, List[AIPerfTestResult]]:
+        """Run all AIPerf tests for all servers"""
+        if not self.setup_completed:
+            logger.error("AIPerf not set up yet")
+            return {}
+        
+        all_results = {}
+        
+        for server_id, server in self.server_manager.servers.items():
+            if server.aiperf_run_commands:
+                logger.info(f"Running tests for server: {server_id}")
+                results = self.run_tests_for_server(server_id)
+                all_results[server_id] = results
+            else:
+                logger.info(f"No tests to run for server: {server_id}")
+                all_results[server_id] = []
+        
+        return all_results
+    
+    def log_test_summary(self):
+        """Log summary of all test results"""
+        logger.info("="*80)
+        logger.info("AIPERF TEST RESULTS SUMMARY")
+        logger.info("="*80)
+        
+        total_tests = len(self.test_results)
+        successful_tests = sum(1 for result in self.test_results if result.success)
+        failed_tests = total_tests - successful_tests
+        
+        logger.info(f"Total Tests: {total_tests}")
+        logger.info(f"Successful: {successful_tests}")
+        logger.info(f"Failed: {failed_tests}")
+        logger.info(f"Success Rate: {(successful_tests/total_tests*100):.1f}%" if total_tests > 0 else "N/A")
+        
+        # Group by server
+        server_results = {}
+        for result in self.test_results:
+            if result.server_id not in server_results:
+                server_results[result.server_id] = []
+            server_results[result.server_id].append(result)
+        
+        for server_id, results in server_results.items():
+            logger.info(f"\nServer: {server_id}")
+            for i, result in enumerate(results, 1):
+                status = "PASS" if result.success else "FAIL"
+                logger.info(f"  Test {i}: {status} ({result.execution_time:.2f}s) - {result.command.tag_name}")
+                if not result.success and result.error_message:
+                    logger.info(f"    Error: {result.error_message}")
+        
+        logger.info("="*80)
+
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+# Test framework for markdown-based CI testing`