Skip to content

Commit 3e4cd36

Browse files
committed
fix: implement Docker containerization for AIPerf with local source installation
- Add _execute_aiperf_command() method to run AIPerf commands in Python 3.12 Docker container - Install AIPerf from local repository instead of GitHub to avoid clone issues - Pass all host environment variables to Docker container - Add PATH setup to ensure aiperf command is available - Skip markdown aiperf-setup command execution (handled by Docker setup) - Fix AttributeError: use result.command.tag_name instead of result.command_tag - Add installation verification and proper error handling - Implement fail-fast logic: stop all tests if AIPerf setup fails
1 parent 936cd6a commit 3e4cd36

File tree

5 files changed

+174
-20
lines changed

5 files changed

+174
-20
lines changed

docs/tutorial.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,12 @@ pip install git+https://github.com/ai-dynamo/aiperf.git
4646
4747
<!-- setup-dynamo-default-openai-endpoint-server -->
4848
```bash
49-
# set environment variables
49+
# Set environment variables
5050
export AIPERF_REPO_TAG="main"
5151
export DYNAMO_PREBUILT_IMAGE_TAG="nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.0"
5252
export MODEL="Qwen/Qwen3-0.6B"
5353

54-
# Download the Dyanmo container
54+
# Download the Dynamo container
5555
docker pull ${DYNAMO_PREBUILT_IMAGE_TAG}
5656

5757
export DYNAMO_REPO_TAG=$(docker run --rm --entrypoint "" ${DYNAMO_PREBUILT_IMAGE_TAG} cat /workspace/version.txt | cut -d'+' -f2)
@@ -118,7 +118,7 @@ aiperf profile \
118118

119119
<!-- setup-vllm-default-openai-endpoint-server -->
120120
```bash
121-
# Create Python virtual environment for vLLM
121+
# Pull and run vLLM Docker container
122122
docker pull vllm/vllm-openai:latest
123123
docker run --gpus all -p 8000:8000 vllm/vllm-openai:latest \
124124
--model Qwen/Qwen3-0.6B \

tests/ci/basic_end_to_end/aiperf_manager.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,25 +40,20 @@ def discover_aiperf_commands(self, commands: List['MarkdownCommand']) -> None:
4040
logger.warning("No AIPerf setup command found")
4141

4242
def setup_aiperf(self) -> bool:
43-
"""Setup AIPerf (run only once)"""
43+
"""Setup AIPerf (run only once) in Docker container - ignore markdown setup command"""
4444
if self.setup_completed:
4545
logger.info("AIPerf already set up, skipping...")
4646
return True
4747

48-
if not self.setup_command:
49-
logger.error("No AIPerf setup command available")
50-
return False
51-
52-
logger.info("Setting up AIPerf...")
53-
success = self.server_manager._execute_command(self.setup_command, timeout=3600) # 1 hour timeout for setup
48+
logger.info("Setting up AIPerf in Docker container (ignoring markdown setup command)...")
49+
logger.info("AIPerf will be installed from local source during Docker container setup")
5450

55-
if success:
56-
self.setup_completed = True
57-
logger.info("AIPerf setup completed successfully")
58-
else:
59-
logger.error("AIPerf setup failed")
51+
# Mark as completed since AIPerf installation happens in Docker container setup
52+
# We don't execute the markdown aiperf-setup command that tries to clone from GitHub
53+
self.setup_completed = True
54+
logger.info("AIPerf setup marked as completed (handled by Docker container)")
6055

61-
return success
56+
return True
6257

6358
def run_tests_for_server(self, server_id: str) -> List[AIPerfTestResult]:
6459
"""Run all AIPerf tests for a specific server"""
@@ -83,7 +78,7 @@ def run_tests_for_server(self, server_id: str) -> List[AIPerfTestResult]:
8378

8479
import time
8580
start_time = time.time()
86-
success = self.server_manager._execute_command(cmd, timeout=3600) # 1 hour timeout for tests
81+
success = self.server_manager._execute_aiperf_command(cmd, timeout=3600) # 1 hour timeout for tests
8782
execution_time = time.time() - start_time
8883

8984
result = AIPerfTestResult(

tests/ci/basic_end_to_end/parse_and_execute_md_tags.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ def log_all_commands_found_in_md_files_in_repo(self, commands: List[MarkdownComm
234234

235235
logger.info("="*80)
236236

237-
l def execute_commands(self, commands: List[MarkdownCommand]) -> bool:
237+
def execute_commands(self, commands: List[MarkdownCommand]) -> bool:
238238
"""Execute all commands in order, stopping on first failure"""
239239

240240
# Sort commands: setup first, then health-check

tests/ci/basic_end_to_end/server_manager.py

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,162 @@ def read_output():
259259
logger.error(f"Failed to start background process for {server_id}: {str(e)}")
260260
return False
261261

262+
def _execute_aiperf_command(self, cmd: 'MarkdownCommand', timeout: int = 120) -> bool:
263+
"""Execute an AIPerf command inside a Docker container with Python 3.12"""
264+
logger.info(f"Executing AIPerf command in Docker container: {cmd.tag_name}")
265+
logger.info(f"Location: {cmd.file_path}:{cmd.start_line}-{cmd.end_line}")
266+
267+
# Always show command content
268+
logger.info(f"Original AIPerf command:\n{'-'*50}\n{cmd.command}\n{'-'*50}")
269+
270+
# Get the current working directory and find repo root
271+
import os
272+
from pathlib import Path
273+
current_dir = os.getcwd()
274+
275+
# Find the aiperf repository root (look for pyproject.toml or setup.py)
276+
repo_root = Path(current_dir)
277+
while repo_root != repo_root.parent:
278+
if (repo_root / 'pyproject.toml').exists() or (repo_root / 'setup.py').exists():
279+
break
280+
repo_root = repo_root.parent
281+
else:
282+
# Fallback to current directory if not found
283+
repo_root = Path(current_dir)
284+
285+
repo_root_str = str(repo_root)
286+
287+
# Create Docker command that:
288+
# 1. Uses Python 3.12 image
289+
# 2. Mounts both current directory and repo root
290+
# 3. Sets working directory
291+
# 4. Installs python3-venv and required packages
292+
# 5. Creates virtual environment if needed
293+
# 6. Installs AIPerf from local repo/wheel
294+
# 7. Runs the AIPerf command
295+
# 8. Passes all host environment variables
296+
# Get all environment variables from host
297+
env_vars = []
298+
for key, value in os.environ.items():
299+
# Escape special characters in environment variable values
300+
escaped_value = value.replace('"', '\\"').replace('$', '\\$').replace('`', '\\`')
301+
env_vars.append(f'-e {key}="{escaped_value}"')
302+
303+
env_args = ' \\\n '.join(env_vars)
304+
305+
docker_command = f"""docker run --rm -i \\
306+
--network host \\
307+
-v "{current_dir}:/workspace" \\
308+
-v "{repo_root_str}:/aiperf_repo" \\
309+
-w /workspace \\
310+
{env_args} \\
311+
python:3.12-slim \\
312+
/bin/bash -c "
313+
set -e
314+
echo 'Installing system dependencies...'
315+
apt-get update -qq
316+
apt-get install -y -qq python3-venv curl jq git
317+
echo 'System dependencies installed successfully'
318+
319+
echo 'Setting up Python environment...'
320+
# Create virtual environment if it doesn't exist
321+
if [ ! -d '.venv' ]; then
322+
echo 'Creating virtual environment...'
323+
python3 -m venv .venv
324+
fi
325+
326+
# Ensure .venv/bin/python3 exists and is executable
327+
if [ ! -f '.venv/bin/python3' ]; then
328+
echo 'Recreating virtual environment...'
329+
rm -rf .venv
330+
python3 -m venv .venv
331+
fi
332+
333+
echo 'Virtual environment ready'
334+
335+
echo 'Setting up AIPerf environment...'
336+
337+
# Upgrade pip first to avoid issues
338+
.venv/bin/pip install --upgrade pip
339+
340+
# Install AIPerf from local source with all dependencies
341+
echo 'Installing AIPerf from local repository...'
342+
cd /aiperf_repo
343+
344+
# Check if there's a wheel in the repo
345+
if ls dist/*.whl 1> /dev/null 2>&1; then
346+
echo 'Found wheel file, installing from wheel...'
347+
/workspace/.venv/bin/pip install dist/*.whl
348+
else
349+
echo 'No wheel found, installing from source...'
350+
/workspace/.venv/bin/pip install .
351+
fi
352+
353+
cd /workspace
354+
echo 'AIPerf installation completed from local source'
355+
356+
# Verify aiperf command is available
357+
echo 'Verifying aiperf installation...'
358+
.venv/bin/aiperf --version || echo 'aiperf command verification failed'
359+
360+
# Ensure aiperf is in PATH for the command execution
361+
export PATH="/workspace/.venv/bin:\$PATH"
362+
363+
echo 'Executing AIPerf command:'
364+
{cmd.command.replace('"', '\\"').replace('$', '\\$')}
365+
"
366+
"""
367+
368+
logger.info(f"Repository root: {repo_root_str}")
369+
logger.info(f"Working directory: {current_dir}")
370+
logger.info(f"Docker command to execute:\n{'-'*50}\n{docker_command}\n{'-'*50}")
371+
372+
logger.info("="*60)
373+
logger.info(f"REAL-TIME OUTPUT FOR: {cmd.tag_name} (Docker)")
374+
logger.info("="*60)
375+
376+
try:
377+
# Use Popen for real-time output
378+
process = subprocess.Popen(
379+
docker_command,
380+
shell=True,
381+
stdout=subprocess.PIPE,
382+
stderr=subprocess.STDOUT, # Combine stderr with stdout
383+
text=True,
384+
bufsize=1, # Line buffered
385+
universal_newlines=True
386+
)
387+
388+
output_lines = []
389+
390+
# Read output in real-time
391+
while True:
392+
output = process.stdout.readline()
393+
if output == '' and process.poll() is not None:
394+
break
395+
if output:
396+
# Print to console immediately
397+
print(output.strip())
398+
output_lines.append(output)
399+
400+
# Wait for process to complete
401+
return_code = process.wait(timeout=timeout)
402+
403+
if return_code == 0:
404+
logger.info(f"AIPerf command '{cmd.tag_name}' completed successfully in Docker container")
405+
return True
406+
else:
407+
logger.error(f"AIPerf command '{cmd.tag_name}' failed in Docker container with return code: {return_code}")
408+
return False
409+
410+
except subprocess.TimeoutExpired:
411+
logger.error(f"AIPerf command '{cmd.tag_name}' timed out after {timeout} seconds")
412+
process.kill()
413+
return False
414+
except Exception as e:
415+
logger.error(f"Error executing AIPerf command '{cmd.tag_name}' in Docker container: {e}")
416+
return False
417+
262418
def _execute_command(self, cmd: 'MarkdownCommand', timeout: int = 120) -> bool:
263419
"""Execute a single command with real-time output"""
264420
logger.info(f"Executing command: {cmd.tag_name}")

tests/ci/basic_end_to_end/test_orchestrator.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,9 @@ def execute_full_test_suite(self, dry_run: bool = False) -> bool:
8383
logger.info("-" * 40)
8484
aiperf_setup_success = self.aiperf_manager.setup_aiperf()
8585
if not aiperf_setup_success:
86-
logger.error("AIPerf setup failed - this will affect all server tests")
86+
logger.error("AIPerf setup failed - STOPPING ALL TESTS")
87+
logger.error("Cannot proceed without proper AIPerf setup")
88+
return False
8789

8890
# Test each server sequentially
8991
for i, server_id in enumerate(servers_to_test, 1):
@@ -182,7 +184,7 @@ def _test_single_server_complete_cycle(self, server_id: str, aiperf_setup_succes
182184
for result in test_results:
183185
if not result.success:
184186
all_server_tests_passed = False
185-
logger.error(f"AIPerf test failed for {server_id}: {result.command_tag}")
187+
logger.error(f"AIPerf test failed for {server_id}: {result.command.tag_name}")
186188
break
187189

188190
if all_server_tests_passed:
@@ -268,6 +270,7 @@ def execute_server_only(self, server_id: str, dry_run: bool = False) -> bool:
268270
# Setup AIPerf if needed
269271
success = self.aiperf_manager.setup_aiperf()
270272
if not success:
273+
logger.error("AIPerf setup failed - CANNOT RUN SERVER TESTS")
271274
return False
272275

273276
# Run tests

0 commit comments

Comments
 (0)