Skip to content

Commit 73bbc25

Browse files
alec-flowersclaudeishandhanani
authored
feat: trace-replay benchmark with aiperf_args passthrough (#18)
* feat: add aiperf_args passthrough and fix tokenizer for trace-replay - Add --tokenizer-trust-remote-code to aiperf calls (fixes Kimi tokenizer) - Install tiktoken if missing (required by Kimi's custom tokenizer) - Add aiperf_args dict to BenchmarkConfig for passing extra aiperf CLI flags - bench.sh accepts extra args after positional params - Add production aiperf flags to kimi recipe (duration, timeout, workers, etc) - Increase file descriptor limit and add PYTHONUNBUFFERED for real-time logging Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: direct warmup artifacts to ARTIFACT_DIR instead of cwd The aiperf warmup call was missing --artifact-dir, causing it to write artifacts to the working directory (creating artifacts/ in repo root). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * test: add tests for aiperf_args passthrough in trace-replay Tests that key-value args are passed as --key value flags and boolean args are passed as --flag (true) or omitted (false). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: add aiperf_package config for controlling aiperf version Add benchmark.aiperf_package field to specify the pip install spec for aiperf (e.g., "aiperf>=0.7.0"). Passed as AIPERF_PACKAGE env var to bench.sh which does pip install --upgrade. Defaults to "aiperf" if not set. Always installs tiktoken alongside. Needed because container-bundled aiperf may predate fixes like trust-remote-code propagation to pool workers (aiperf PR #744). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: ishandhanani <82981111+ishandhanani@users.noreply.github.com>
1 parent 835ddb6 commit 73bbc25

7 files changed

Lines changed: 147 additions & 11 deletions

File tree

src/srtctl/benchmarks/base.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,20 @@ def build_command(
6161

6262

6363
class AIPerfBenchmarkRunner(BenchmarkRunner):
64-
"""Marker base class for AIPerf-driven benchmarks."""
64+
"""Base class for AIPerf-driven benchmarks.
65+
66+
Provides shared aiperf_args handling for subclasses.
67+
"""
68+
69+
def append_aiperf_args(self, cmd: list[str], config: SrtConfig) -> list[str]:
70+
"""Append aiperf_args from config as CLI flags."""
71+
for key, value in config.benchmark.aiperf_args.items():
72+
if isinstance(value, bool):
73+
if value:
74+
cmd.append(f"--{key}")
75+
else:
76+
cmd.extend([f"--{key}", str(value)])
77+
return cmd
6578

6679

6780
# Registry of benchmark runners

src/srtctl/benchmarks/mooncake_router.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def build_command(
101101
# For HF models, use the model ID directly so transformers downloads it
102102
tokenizer_path = str(runtime.model_path) if runtime.is_hf_model else "/model"
103103

104-
return [
104+
cmd = [
105105
"bash",
106106
self.script_path,
107107
endpoint,
@@ -111,3 +111,7 @@ def build_command(
111111
str(itl_threshold),
112112
tokenizer_path,
113113
]
114+
115+
self.append_aiperf_args(cmd, config)
116+
117+
return cmd

src/srtctl/benchmarks/scripts/trace-replay/bench.sh

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,25 @@
66
# Replays a user-provided JSONL trace dataset at configurable concurrency levels.
77
# Uses aiperf with --custom-dataset-type mooncake_trace.
88
#
9-
# Usage: bench.sh ENDPOINT MODEL_NAME TRACE_FILE CONCURRENCIES [TTFT_THRESHOLD] [ITL_THRESHOLD] [TOKENIZER_PATH]
9+
# Usage: bench.sh ENDPOINT MODEL_NAME TRACE_FILE CONCURRENCIES [TTFT_THRESHOLD] [ITL_THRESHOLD] [TOKENIZER_PATH] [EXTRA_ARGS]
10+
#
11+
# EXTRA_ARGS: JSON-encoded string of additional aiperf flags (passed from Python)
1012

1113
set -e
1214

15+
# Ensure Python output is unbuffered for real-time logging
16+
export PYTHONUNBUFFERED=1
17+
1318
ENDPOINT=$1
1419
MODEL_NAME=${2:-"test-model"}
1520
TRACE_FILE=$3
1621
CONCURRENCIES=${4:-"1"}
1722
TTFT_THRESHOLD=${5:-2000}
1823
ITL_THRESHOLD=${6:-25}
1924
TOKENIZER_PATH=${7:-"/model"}
25+
# Remaining args are extra aiperf flags
26+
shift 7 2>/dev/null || true
27+
EXTRA_ARGS=("$@")
2028

2129
# Optional: extra Prometheus endpoints for AIPerf server metrics
2230
SERVER_METRICS_ARGS=()
@@ -32,6 +40,9 @@ BASE_DIR="${BASE_DIR:-/logs}"
3240
ARTIFACT_DIR="${ARTIFACT_DIR:-${BASE_DIR}/artifacts}"
3341
mkdir -p "${ARTIFACT_DIR}"
3442

43+
# Increase file descriptor limit for high concurrency
44+
ulimit -n 600000 2>/dev/null || ulimit -n 65536 2>/dev/null || true
45+
3546
# Increase aiperf HTTP timeout
3647
export AIPERF_HTTP_SO_RCVTIMEO=120
3748

@@ -45,6 +56,9 @@ echo "Concurrencies: ${CONCURRENCIES}"
4556
echo "TTFT Threshold: ${TTFT_THRESHOLD}ms"
4657
echo "ITL Threshold: ${ITL_THRESHOLD}ms"
4758
echo "Tokenizer Path: ${TOKENIZER_PATH}"
59+
if [ ${#EXTRA_ARGS[@]} -gt 0 ]; then
60+
echo "Extra Args: ${EXTRA_ARGS[*]}"
61+
fi
4862
echo "=============================================="
4963

5064
# Validate trace file exists
@@ -53,23 +67,40 @@ if [ ! -f "${TRACE_FILE}" ]; then
5367
exit 1
5468
fi
5569

56-
# Install aiperf if not present
57-
if ! command -v aiperf &> /dev/null; then
58-
echo "Installing aiperf..."
59-
pip install aiperf
70+
# Create isolated aiperf environment (avoids polluting container packages)
71+
# AIPERF_PACKAGE env var controls the version (e.g., "aiperf>=0.7.0")
72+
AIPERF_SPEC="${AIPERF_PACKAGE:-aiperf}"
73+
AIPERF_VENV="/tmp/aiperf-${SLURM_JOB_ID:-$$}"
74+
75+
echo "Setting up aiperf environment: ${AIPERF_SPEC}"
76+
77+
# Install uv if not in container
78+
if ! command -v uv &> /dev/null; then
79+
echo "Installing uv..."
80+
curl -LsSf https://astral.sh/uv/install.sh | sh
81+
export PATH="$HOME/.local/bin:$PATH"
6082
fi
6183

84+
uv venv "${AIPERF_VENV}"
85+
uv pip install -p "${AIPERF_VENV}" "${AIPERF_SPEC}" tiktoken
86+
export PATH="${AIPERF_VENV}/bin:${PATH}"
87+
echo "aiperf $(aiperf --version 2>/dev/null || echo 'installed') in ${AIPERF_VENV}"
88+
6289
# Run small benchmark for warmup
6390
echo "Running warmup..."
91+
WARMUP_DIR="${ARTIFACT_DIR}/warmup"
92+
mkdir -p "${WARMUP_DIR}"
6493
aiperf profile \
6594
-m "${MODEL_NAME}" \
6695
--tokenizer "${TOKENIZER_PATH}" \
96+
--tokenizer-trust-remote-code \
6797
--url "${ENDPOINT}" \
6898
--streaming \
6999
--ui simple \
70100
--extra-inputs ignore_eos:true \
71101
--concurrency 1 \
72-
--request-count 5
102+
--request-count 5 \
103+
--artifact-dir "${WARMUP_DIR}"
73104
echo "Warmup complete"
74105

75106
# Setup artifact directory
@@ -92,6 +123,7 @@ for C in "${CONCURRENCY_LIST[@]}"; do
92123
aiperf profile \
93124
-m "${MODEL_NAME}" \
94125
--tokenizer "${TOKENIZER_PATH}" \
126+
--tokenizer-trust-remote-code \
95127
--input-file "${TRACE_FILE}" \
96128
--custom-dataset-type mooncake_trace \
97129
--url "${ENDPOINT}" \
@@ -102,7 +134,8 @@ for C in "${CONCURRENCY_LIST[@]}"; do
102134
--ui simple \
103135
--artifact-dir "${RUN_ARTIFACT_DIR}" \
104136
"${SERVER_METRICS_ARGS[@]}" \
105-
--goodput "time_to_first_token:${TTFT_THRESHOLD} inter_token_latency:${ITL_THRESHOLD}"
137+
--goodput "time_to_first_token:${TTFT_THRESHOLD} inter_token_latency:${ITL_THRESHOLD}" \
138+
"${EXTRA_ARGS[@]}"
106139

107140
echo "$(date '+%Y-%m-%d %H:%M:%S') - Concurrency ${C} complete"
108141

src/srtctl/benchmarks/trace_replay.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def build_command(
8282

8383
tokenizer_path = str(runtime.model_path) if runtime.is_hf_model else "/model"
8484

85-
return [
85+
cmd = [
8686
"bash",
8787
self.script_path,
8888
endpoint,
@@ -93,3 +93,7 @@ def build_command(
9393
str(itl_threshold),
9494
tokenizer_path,
9595
]
96+
97+
self.append_aiperf_args(cmd, config)
98+
99+
return cmd

src/srtctl/cli/mixins/benchmark_stage.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,10 @@ def _get_benchmark_env(self, runner: "BenchmarkRunner") -> dict[str, str]:
285285
env = self._get_benchmark_profiling_env(runner)
286286
env["SRTCTL_FRONTEND_TYPE"] = self.config.frontend.type
287287

288-
# Add AIPerf metrics URLs for AIPerf-driven benchmarks
288+
# Add AIPerf-specific env vars for AIPerf-driven benchmarks only
289289
if isinstance(runner, AIPerfBenchmarkRunner):
290290
env.update(self._get_aiperf_server_metrics_env())
291+
if self.config.benchmark.aiperf_package:
292+
env["AIPERF_PACKAGE"] = self.config.benchmark.aiperf_package
291293

292294
return env

src/srtctl/core/schema.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,11 @@ class BenchmarkConfig:
546546
trace_file: str | None = None # Path to trace JSONL file (container path, e.g., /traces/dataset.jsonl)
547547
custom_tokenizer: str | None = None # Custom tokenizer class (e.g., "module.path.ClassName")
548548
use_chat_template: bool = True # Pass --use-chat-template to benchmark (default: true)
549+
# aiperf pip install spec (e.g., "aiperf>=0.7.0", "aiperf @ git+https://...@commit")
550+
# If set, runs pip install <spec> before benchmarking. Upgrades if already installed.
551+
aiperf_package: str | None = None
552+
# Extra aiperf CLI flags passed through to bench.sh (e.g., benchmark-duration: 600, workers-max: 200)
553+
aiperf_args: dict[str, Any] = field(default_factory=dict)
549554

550555
def get_concurrency_list(self) -> list[int]:
551556
if self.concurrencies is None:

tests/test_benchmarks.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,81 @@ def test_build_command_default_thresholds(self):
322322
assert cmd[6] == "2000" # default ttft
323323
assert cmd[7] == "25" # default itl
324324

325+
def test_build_command_with_aiperf_args(self):
326+
"""aiperf_args are passed through as CLI flags."""
327+
from unittest.mock import MagicMock
328+
329+
from srtctl.benchmarks.trace_replay import TraceReplayRunner
330+
331+
runner = TraceReplayRunner()
332+
runtime = MagicMock()
333+
runtime.frontend_port = 8000
334+
runtime.is_hf_model = False
335+
336+
from srtctl.core.schema import BenchmarkConfig, ModelConfig, ResourceConfig, SrtConfig
337+
338+
config = SrtConfig(
339+
name="test",
340+
model=ModelConfig(path="/model/kimi", container="/image", precision="fp4"),
341+
resources=ResourceConfig(gpu_type="gb200"),
342+
benchmark=BenchmarkConfig(
343+
type="trace-replay",
344+
trace_file="/traces/dataset.jsonl",
345+
concurrencies=[4],
346+
aiperf_args={
347+
"benchmark-duration": 600,
348+
"workers-max": 200,
349+
"request-timeout-seconds": 1200,
350+
"profile-export-level": "raw",
351+
},
352+
),
353+
)
354+
355+
cmd = runner.build_command(config, runtime)
356+
357+
# Positional args come first (9 of them)
358+
assert cmd[8] == "/model" # tokenizer path
359+
360+
# aiperf_args appended after positional args
361+
extra = cmd[9:]
362+
assert "--benchmark-duration" in extra
363+
assert extra[extra.index("--benchmark-duration") + 1] == "600"
364+
assert "--workers-max" in extra
365+
assert extra[extra.index("--workers-max") + 1] == "200"
366+
assert "--request-timeout-seconds" in extra
367+
assert "--profile-export-level" in extra
368+
assert extra[extra.index("--profile-export-level") + 1] == "raw"
369+
370+
def test_build_command_aiperf_args_bool(self):
371+
"""Boolean aiperf_args are passed as flags without values."""
372+
from unittest.mock import MagicMock
373+
374+
from srtctl.benchmarks.trace_replay import TraceReplayRunner
375+
376+
runner = TraceReplayRunner()
377+
runtime = MagicMock()
378+
runtime.frontend_port = 8000
379+
runtime.is_hf_model = False
380+
381+
from srtctl.core.schema import BenchmarkConfig, ModelConfig, ResourceConfig, SrtConfig
382+
383+
config = SrtConfig(
384+
name="test",
385+
model=ModelConfig(path="/model/test", container="/image", precision="fp4"),
386+
resources=ResourceConfig(gpu_type="gb200"),
387+
benchmark=BenchmarkConfig(
388+
type="trace-replay",
389+
trace_file="/traces/dataset.jsonl",
390+
concurrencies=[1],
391+
aiperf_args={"export-http-trace": True, "disabled-flag": False},
392+
),
393+
)
394+
395+
cmd = runner.build_command(config, runtime)
396+
extra = cmd[9:]
397+
assert "--export-http-trace" in extra
398+
assert "--disabled-flag" not in extra
399+
325400
def test_config_roundtrip(self):
326401
"""Config with trace-replay loads correctly from YAML."""
327402
import tempfile

0 commit comments

Comments
 (0)