Skip to content

Commit 99671c0

Browse files
committed
add test_llm_d_inference_sim end-to-end testing
run with the same `pdm run test:e2e`. this requires `llm-d-inference-sim` to be present in the local environment. see the module docstring for more information. the `e2e_test-on-change.yml` workflow has been updated to run on all `push` and `pull_request` events, not just this one.
1 parent 28d4b2b commit 99671c0

File tree

8 files changed

+313
-21
lines changed

8 files changed

+313
-21
lines changed

.github/workflows/e2e_test-on-change.yml

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,34 +2,24 @@ name: E2E Test on change
22

33
on:
44
push:
5-
branches:
6-
- main
7-
- 'feature/**'
85
pull_request:
9-
branches:
10-
- main
11-
- 'feature/**'
126

137
jobs:
148
e2e-tests:
159
runs-on: ubuntu-latest
16-
strategy:
17-
matrix:
18-
python-version: ['3.13']
1910
steps:
20-
- name: Checkout Code
11+
- name: Checkout code
2112
uses: actions/checkout@v4
22-
- name: Set up Python
23-
uses: actions/setup-python@v5
24-
with:
25-
python-version: ${{ matrix.python-version }}
26-
- name: Set up PDM
27-
uses: pdm-project/setup-pdm@v4
13+
14+
- name: Install Nix
15+
uses: cachix/install-nix-action@v31
2816
with:
29-
python-version: ${{ matrix.python-version }}
17+
github_access_token: ${{ secrets.GITHUB_TOKEN }}
18+
3019
- name: Install dependencies
3120
run: |
32-
pdm sync -d
33-
- name: Run e2e tests
21+
nix develop -c pdm sync -d
22+
23+
- name: Run end-to-end tests
3424
run: |
35-
pdm run test:e2e
25+
nix develop -c pdm run test:e2e

e2e/testdata/models/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
*
2+
!.gitignore
3+
!*.tar.gz
4+
!*.tar.zst
5.38 MB
Binary file not shown.
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
"""
2+
End-to-end integration testing of inference-perf using llm-d-inference-sim[1].
3+
4+
In order for these tests to run, you must have `llm-d-inference-sim` in your
5+
PATH. The GitHub Actions runner will have this, but you may also install it
6+
locally by following llm-d-inference-sim's README or by entering the Nix shell
7+
of this repository (i.e. `nix develop`).
8+
9+
If your local environment is missing `llm-d-inference-sim`, tests here will
10+
automatically be skipped.
11+
12+
[1]: https://github.com/llm-d/llm-d-inference-sim
13+
"""
14+
15+
import pytest
16+
17+
from utils.llm_d_inference_sim import LLMDInferenceSimRunner
18+
from utils.benchmark import run_benchmark_minimal
19+
from utils.testdata import extract_tarball
20+
21+
22+
TEST_SIM_PORT = 18000
23+
TEST_MODEL_NAME = "google/gemma-3-270m"
24+
TEST_MODEL_TARBALL = "e2e/testdata/models/google_gemma-3-270m.tar.gz"
25+
26+
27+
@pytest.mark.asyncio
28+
@pytest.mark.skipif(not LLMDInferenceSimRunner.is_available(), reason="local environment missing llm-d-inference-sim")
29+
@pytest.mark.parametrize(
30+
"data",
31+
[
32+
pytest.param(
33+
{
34+
"type": "mock",
35+
},
36+
id="data_mock",
37+
),
38+
pytest.param(
39+
{
40+
"type": "shared_prefix",
41+
"shared_prefix": {
42+
"num_groups": 256,
43+
"num_prompts_per_group": 16,
44+
"system_prompt_len": 512,
45+
"question_len": 256,
46+
"output_len": 256,
47+
},
48+
},
49+
id="data_shared_prefix",
50+
),
51+
],
52+
)
53+
@pytest.mark.parametrize(
54+
"load",
55+
[
56+
pytest.param(
57+
{
58+
"type": "constant",
59+
"stages": [{"rate": 1, "duration": 5}],
60+
"num_workers": 2,
61+
},
62+
id="load_constant_slow",
63+
),
64+
pytest.param(
65+
{
66+
"type": "constant",
67+
"interval": 2,
68+
"stages": [{"rate": 1, "duration": 5}, {"rate": 2, "duration": 5}],
69+
"num_workers": 2,
70+
},
71+
id="load_constant_slow_two_stages",
72+
),
73+
pytest.param(
74+
{
75+
"type": "constant",
76+
"stages": [{"rate": 100, "duration": 5}],
77+
"num_workers": 2,
78+
},
79+
id="load_constant_fast",
80+
),
81+
],
82+
)
83+
async def test_completion_successful_run(data: dict, load: dict):
84+
"""
85+
Very simple inference-perf integration test that ensures a wide range of
86+
vLLM benchmarking configurations can run successfully.
87+
"""
88+
config = {
89+
"data": data,
90+
"load": load,
91+
"api": {
92+
"type": "completion",
93+
"streaming": True,
94+
},
95+
"server": {
96+
"type": "vllm",
97+
"model_name": TEST_MODEL_NAME,
98+
"base_url": f"http://127.0.0.1:{TEST_SIM_PORT}",
99+
"ignore_eos": True,
100+
},
101+
"tokenizer": {
102+
"pretrained_model_name_or_path": str(extract_tarball(TEST_MODEL_TARBALL)),
103+
},
104+
"report": {
105+
"request_lifecycle": {
106+
"summary": True,
107+
"per_stage": True,
108+
"per_request": True,
109+
},
110+
},
111+
}
112+
113+
async with LLMDInferenceSimRunner(TEST_MODEL_NAME, port=TEST_SIM_PORT):
114+
result = await run_benchmark_minimal(config)
115+
116+
assert result.success, "Benchmark failed"
117+
assert result.reports, "No reports generated from benchmark"
118+
assert result.reports["summary_lifecycle_metrics.json"], "Missing summary report"
119+
assert result.reports["per_request_lifecycle_metrics.json"], "Missing requests report"

e2e/utils/llm_d_inference_sim.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
import aiohttp
2+
import asyncio
3+
import logging
4+
import sys
5+
import textwrap
6+
import shutil
7+
from contextlib import AsyncContextDecorator
8+
9+
10+
logger = logging.getLogger(__name__)
11+
12+
13+
class LLMDInferenceSimRunner(AsyncContextDecorator):
14+
@staticmethod
15+
def is_available(executable: str = "llm-d-inference-sim") -> bool:
16+
"""
17+
Returns whether llm-d-inference-sim is present in the local
18+
environment.
19+
"""
20+
return shutil.which(executable) is not None
21+
22+
executable: str
23+
argv: list[str]
24+
25+
_port: int
26+
_proc: asyncio.subprocess.Process | None = None
27+
_wait_until_ready: bool
28+
29+
def __init__(
30+
self,
31+
model: str,
32+
*cmd_args: str,
33+
port: int = 8000,
34+
max_waiting_queue_length: int = 10000,
35+
executable: str = "llm-d-inference-sim",
36+
wait_until_ready=True,
37+
) -> None:
38+
self.executable = executable
39+
self.argv = [
40+
*("--port", str(port)),
41+
*("--model", model),
42+
*("--max-waiting-queue-length", str(max_waiting_queue_length)),
43+
*cmd_args,
44+
]
45+
self._port = port
46+
self._wait_until_ready = wait_until_ready
47+
48+
async def __aenter__(self) -> "LLMDInferenceSimRunner":
49+
"""
50+
Starts running the llm-d-inference-sim server in the background.
51+
Once the contextmanager exits, stop the server using a SIGTERM.
52+
"""
53+
if not LLMDInferenceSimRunner.is_available(self.executable):
54+
raise FileNotFoundError(f"executable not found: {self.executable}")
55+
56+
logger.debug(f"starting server: {self.argv=}")
57+
self._proc = await asyncio.create_subprocess_exec(
58+
self.executable,
59+
*self.argv,
60+
stdout=asyncio.subprocess.PIPE,
61+
stderr=asyncio.subprocess.STDOUT,
62+
)
63+
64+
if self._wait_until_ready:
65+
try:
66+
await self.wait_until_ready()
67+
except Exception:
68+
await self.__aexit__(*sys.exc_info())
69+
raise
70+
71+
return self
72+
73+
async def __aexit__(self, *exc):
74+
"""
75+
Sends a SIGTERM to the server and waits a bit for it to stop.
76+
Returns true if process exited gracefully.
77+
"""
78+
terminate_task = asyncio.create_task(self._terminate())
79+
await self._wait()
80+
await terminate_task
81+
82+
async def wait_until_ready(
83+
self,
84+
polling_sec: float = 0.5,
85+
timeout_sec: float | None = 10,
86+
) -> None:
87+
"""Waits until the server is ready to serve requests."""
88+
assert self._proc
89+
90+
async def wait_http():
91+
async with aiohttp.ClientSession() as http:
92+
while True:
93+
try:
94+
async with http.head(f"http://localhost:{self._port}") as resp:
95+
await resp.read()
96+
logger.debug(f"querying server's / endpoint returned {resp.status=}")
97+
return True
98+
except asyncio.exceptions.CancelledError:
99+
raise
100+
except asyncio.exceptions.TimeoutError:
101+
raise
102+
except Exception as e:
103+
logger.debug(f"http polling error: {e}, retrying...")
104+
await asyncio.sleep(polling_sec)
105+
continue
106+
107+
async def wait_proc():
108+
await self._wait()
109+
raise ConnectionRefusedError("server process exited before port was ready")
110+
111+
done, pending = await asyncio.wait(
112+
[asyncio.create_task(x) for x in [wait_http(), wait_proc()]],
113+
return_when=asyncio.FIRST_COMPLETED,
114+
timeout=timeout_sec,
115+
)
116+
[task.cancel() for task in pending] # cancel pending tasks
117+
[task.result() for task in done] # ensure an exception is thrown
118+
119+
async def _wait(self) -> None:
120+
proc = self._proc
121+
assert proc
122+
123+
stdout, _ = await proc.communicate()
124+
stdout_pretty = textwrap.indent(stdout.decode(), " | ")
125+
logger.debug(f"server exited with status {proc.returncode}, output:\n{stdout_pretty}")
126+
127+
async def _terminate(self) -> None:
128+
proc = self._proc
129+
assert proc
130+
131+
try:
132+
proc.terminate()
133+
await asyncio.sleep(2)
134+
proc.kill()
135+
except ProcessLookupError:
136+
pass # process already exited
137+
except Exception as e:
138+
logger.debug(f"server failed to be terminated: {e}")
139+
raise

e2e/utils/testdata.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import os
2+
import pathlib
3+
import subprocess
4+
5+
TEST_E2E_DIR = pathlib.Path(__file__).parent.parent
6+
TEST_E2E_TESTDATA = TEST_E2E_DIR.joinpath("testdata")
7+
8+
9+
def extract_tarball(name: str | pathlib.Path) -> pathlib.Path:
10+
"""
11+
Extract tarball with the given path to the directory that that tarball is
12+
in.
13+
14+
The returned path is the folder containing the content of the tarball, named
15+
after the tarball name itself without the extension.
16+
"""
17+
name = pathlib.Path(name).resolve()
18+
19+
dest = name
20+
while dest.suffix:
21+
dest = dest.with_suffix("")
22+
23+
if not dest.is_dir():
24+
if not name.is_file():
25+
raise FileNotFoundError(f"Tarball {name} not found!")
26+
27+
os.makedirs(dest)
28+
subprocess.run(["tar", "-xzvf", name, "-C", dest], check=True)
29+
30+
return dest

flake.nix

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,15 @@
5656
(nixRunWrap "llm-d-inference-sim")
5757
];
5858

59-
buildInputs = [ pkgs.python3Packages.venvShellHook ];
59+
buildInputs =
60+
with pkgs;
61+
with python3Packages;
62+
[
63+
numpy
64+
torch
65+
venvShellHook
66+
];
67+
6068
venvDir = "venv";
6169
};
6270

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ docstring-code-format = false
115115
docstring-code-line-length = "dynamic"
116116

117117
[tool.pytest.ini_options]
118+
asyncio_mode = "auto"
119+
asyncio_default_fixture_loop_scope = "session"
118120
log_cli = true
119121
log_cli_level = "INFO"
120122
testpaths = ["."]

0 commit comments

Comments
 (0)