mlcommons · viraatc · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
@@ -14,16 +14,11 @@ jobs:
         with:
           fetch-depth: 0
 
-      - name: Set up Python
-        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
-        with:
-          python-version: "3.12"
+      - name: Install uv
+        uses: astral-sh/setup-uv@e58605a9b6da7c637471fab8847a5e5a6b8df081 # v5
 
       - name: Install dependencies
-        run: |
-          python -m pip install pip==26.0.1
-          pip install -e ".[dev]"
+        run: uv sync --frozen --extra dev
 
       - name: Run pre-commit
-        run: |
-          pre-commit run --all-files --show-diff-on-failure
+        run: uv run pre-commit run --all-files --show-diff-on-failure
@@ -9,26 +9,14 @@ on:
 jobs:
   test:
     runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.12"]
-
     steps:
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
 
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install dependencies
-        run: |
-          python -m pip install pip==26.0.1
-          pip install -e ".[test]"
+      - name: Install uv
+        uses: astral-sh/setup-uv@e58605a9b6da7c637471fab8847a5e5a6b8df081 # v5
 
       - name: Run tests
-        run: |
-          pytest -xv -m "not slow and not performance" --cov=src --cov-report=xml --cov-report=html
+        run: uv run --frozen --extra test pytest -xv -m "not slow and not performance" --cov=src --cov-report=xml --cov-report=html
 
       - name: Upload coverage report
         uses: actions/upload-artifact@v4
@@ -43,13 +31,8 @@ jobs:
     steps:
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
 
-      - name: Set up Python
-        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
-        with:
-          python-version: "3.12"
+      - name: Install uv
+        uses: astral-sh/setup-uv@e58605a9b6da7c637471fab8847a5e5a6b8df081 # v5
 
       - name: Audit dependencies for known vulnerabilities
-        run: |
-          python -m pip install pip==26.0.1
-          pip install -e ".[dev,test,performance]"
-          pip-audit
+        run: uv run --frozen --extra dev --extra test --extra performance pip-audit
@@ -62,3 +62,10 @@ repos:
         types: [python]
         pass_filenames: true
         exclude: ^(src/inference_endpoint/openai/openai_types_gen.py)$
+
+      - id: uv-lock-check
+        name: Check uv.lock is up-to-date
+        entry: uv lock --check
+        language: system
+        pass_filenames: false
+        files: ^pyproject\.toml$
-        files: ^pyproject\.toml$
+        files: ^(pyproject\.toml|uv\.lock)$
-        files: ^pyproject\.toml$
+        files: ^(pyproject\.toml|uv\.lock)$
@@ -0,0 +1 @@
+3.12
@@ -10,28 +10,42 @@ High-performance benchmarking tool for LLM inference endpoints targeting 50k+ QP
 
 ```bash
 # Development setup
-python3.12 -m venv venv && source venv/bin/activate
-pip install -e ".[dev,test]"
-pre-commit install
+uv sync --extra dev --extra test
+uv run pre-commit install
 
 # Testing
-pytest                                        # All tests (excludes slow/performance)
-pytest -m unit                                # Unit tests only
-pytest -m integration                         # Integration tests only
-pytest --cov=src --cov-report=html            # With coverage
-pytest -xvs tests/unit/path/to/test_file.py  # Single test file
+uv run pytest                                        # All tests (excludes slow/performance)
+uv run pytest -m unit                                # Unit tests only
+uv run pytest -m integration                         # Integration tests only
+uv run pytest --cov=src --cov-report=html            # With coverage
+uv run pytest -xvs tests/unit/path/to/test_file.py  # Single test file
 
 # Code quality (run before commits)
-pre-commit run --all-files
+uv run pre-commit run --all-files
 
 # Local testing with echo server
-python -m inference_endpoint.testing.echo_server --port 8765
-inference-endpoint probe --endpoints http://localhost:8765 --model test-model
+uv run python -m inference_endpoint.testing.echo_server --port 8765
+uv run inference-endpoint probe --endpoints http://localhost:8765 --model test-model
 
 # CLI usage
+uv run inference-endpoint benchmark offline --endpoints URL --model NAME --dataset PATH
+uv run inference-endpoint benchmark online --endpoints URL --model NAME --dataset PATH --load-pattern poisson --target-qps 100
+uv run inference-endpoint benchmark from-config --config config.yaml
+```
+
+### Backward-compatible setup (pip + venv)
+
+Does not use `uv.lock` — dependency versions may differ from the lockfile.
+
+```bash
+python3.12 -m venv venv && source venv/bin/activate
+pip install -e ".[dev,test]"
+pre-commit install
+
+# After activating the venv, commands run without the `uv run` prefix:
+pytest -m unit
+pre-commit run --all-files
 inference-endpoint benchmark offline --endpoints URL --model NAME --dataset PATH
-inference-endpoint benchmark online --endpoints URL --model NAME --dataset PATH --load-pattern poisson --target-qps 100
-inference-endpoint benchmark from-config --config config.yaml
 ```
 
 ## Architecture
@@ -345,5 +359,5 @@ Known failure modes when AI tools generate code for this project. Reference thes
 
 ### Dependency & Environment
 
-- **Adding new dependencies without justification**: AI may `pip install` or add imports for packages not in `pyproject.toml`. Any new dependency must be justified, added to the correct optional group, and pinned to an exact version (`==`). After adding a dependency, run `pip-audit` (included in `dev` extras) to verify it has no known vulnerabilities.
+- **Adding new dependencies without justification**: AI may `pip install` or add imports for packages not in `pyproject.toml`. Any new dependency must be justified, added to the correct optional group, and pinned to an exact version (`==`). After adding a dependency, run `pip-audit` (included in `dev` extras) to verify it has no known vulnerabilities. When adding dependencies, use `uv add <package>==<version>` to update both `pyproject.toml` and `uv.lock` atomically, then run `uv run pip-audit` to check for vulnerabilities.
 - **Using `requests`/`aiohttp` for HTTP**: This project has its own HTTP client (`endpoint_client/http.py`) using `httptools`. AI defaults to `requests` or `aiohttp` — these should not appear in production code (test dependencies are fine).
@@ -13,81 +13,107 @@ A high-performance benchmarking tool for LLM endpoints.
 # Note: This repo will be migrated to https://github.com/mlcommons/endpoints
 git clone https://github.com/mlcommons/endpoints.git
 cd endpoints
+```
+
+This project uses [uv](https://docs.astral.sh/uv/) for dependency management. All dependencies are pinned in `uv.lock`.
+
+```bash
+# Install dependencies
+uv sync
+
+# For development (includes linting, testing, and type-checking tools)
+uv sync --extra dev --extra test
+uv run pre-commit install
+```
 
-# Create virtual environment
-python3.12 -m venv venv
-source venv/bin/activate
+<details>
+<summary>Using pip + venv instead (backward-compatible)</summary>
 
-# As a user
-pip install .
+> **Note:** pip installs from `pyproject.toml` directly and does not use `uv.lock`. Dependency versions may differ.
 
-# As a developer (with development and test extras)
+```bash
+python3.12 -m venv venv && source venv/bin/activate
 pip install -e ".[dev,test]"
 pre-commit install
 ```
 
+After activating the venv, all commands below work without the `uv run` prefix.
+
+</details>
+
 ### Basic Usage
 
 ```bash
 # Show help
-inference-endpoint --help
+uv run inference-endpoint --help
 
 # Show system information
-inference-endpoint -v info
+uv run inference-endpoint -v info
 
 # Test endpoint connectivity
-inference-endpoint probe \
+uv run inference-endpoint probe \
   --endpoints http://your-endpoint:8000 \
   --model Qwen/Qwen3-8B
 
 # Run offline benchmark (max throughput - uses all dataset samples)
-inference-endpoint benchmark offline \
+uv run inference-endpoint benchmark offline \
   --endpoints http://your-endpoint:8000 \
   --model Qwen/Qwen3-8B \
   --dataset tests/datasets/dummy_1k.jsonl
 
 # Run online benchmark (sustained QPS - requires --target-qps, --load-pattern)
-inference-endpoint benchmark online \
+uv run inference-endpoint benchmark online \
   --endpoints http://your-endpoint:8000 \
   --model Qwen/Qwen3-8B \
   --dataset tests/datasets/dummy_1k.jsonl \
   --load-pattern poisson \
   --target-qps 100
 
 # With explicit sample count
-inference-endpoint benchmark offline \
+uv run inference-endpoint benchmark offline \
   --endpoints http://your-endpoint:8000 \
   --model Qwen/Qwen3-8B \
   --dataset tests/datasets/dummy_1k.jsonl \
   --num-samples 5000
+
+# ... or activate the venv to skip the `uv run` prefix:
+# source .venv/bin/activate
+# inference-endpoint --help
+# inference-endpoint benchmark offline --endpoints URL --model NAME --dataset PATH
 ```
 
 ### Running Locally
 
 ```bash
 # Start local echo server
-python3 -m inference_endpoint.testing.echo_server --port 8765 &
+uv run python -m inference_endpoint.testing.echo_server --port 8765 &
 
 # Test with dummy dataset (included in repo)
-inference-endpoint benchmark offline \
+uv run inference-endpoint benchmark offline \
   --endpoints http://localhost:8765 \
   --model Qwen/Qwen3-8B \
   --dataset tests/datasets/dummy_1k.jsonl
 
 # Stop echo server
 pkill -f echo_server
+
+# ... or with an activated venv (source .venv/bin/activate):
+# python -m inference_endpoint.testing.echo_server --port 8765 &
+# inference-endpoint benchmark offline --endpoints http://localhost:8765 --model Qwen/Qwen3-8B --dataset tests/datasets/dummy_1k.jsonl
 ```
 
 See [Local Testing Guide](docs/LOCAL_TESTING.md) for detailed instructions.
 
 ### Running Tests and Examples
 
 ```bash
-# Install test dependencies
-pip install ".[test]"
+uv run pytest -m "not performance and not run_explicitly"
+uv run pytest -m unit
+uv run pytest --cov=src --cov-report=html
 
-# Run tests (excluding performance and explicit-run tests)
-pytest -m "not performance and not run_explicitly"
+# ... or with an activated venv (source .venv/bin/activate):
+# pytest -m "not performance and not run_explicitly"
+# pytest -m unit
 
 # Run examples: follow instructions in examples/*/README.md
 ```

@@ -1,6 +1,20 @@
 [build-system]
-requires = ["setuptools==78.1.1", "wheel==0.46.3"]
-build-backend = "setuptools.build_meta"
+requires = ["uv_build>=0.7.6,<0.8"]
+build-backend = "uv_build"
+
+[tool.uv]
+index-url = "https://pypi.org/simple"
+environments = [
+    "sys_platform == 'linux' and platform_machine == 'x86_64'",
+    "sys_platform == 'linux' and platform_machine == 'aarch64'",
+    "sys_platform == 'darwin' and platform_machine == 'x86_64'",
+    "sys_platform == 'darwin' and platform_machine == 'arm64'",
+]
+
+[tool.uv-build]
+module-root = "src"
+data = {"inference_endpoint" = ["config/templates/*.yaml"]}
+exclude = ["evaluation/livecodebench/_server.py"]
 
 [project]
 name = "inference-endpoint"
@@ -112,21 +126,6 @@ Documentation = "https://github.com/mlperf/inference-endpoint#readme"
 Repository = "https://github.com/mlperf/inference-endpoint.git"
 Issues = "https://github.com/mlperf/inference-endpoint/issues"
 
-[tool.setuptools.packages.find]
-where = ["src"]
-
-[tool.setuptools.package-dir]
-"" = "src"
-
-[tool.setuptools.package-data]
-inference_endpoint = ["config/templates/*.yaml"]
-
-[tool.setuptools.exclude-package-data]
-"inference_endpoint.evaluation.livecodebench" = ["_server.py"]
-
-[tool.autopep8]
-max_line_length = 88
-
 [tool.ruff]
 target-version = "py312"
 line-length = 88

@@ -5,20 +5,25 @@
 
 FROM python:3.12.11-slim
 
+# Copy uv binary from official image
+COPY --from=ghcr.io/astral-sh/uv:0.7.6 /uv /uvx /bin/
+
 # Set environment variables
 ENV PYTHONUNBUFFERED=1 \
     PYTHONDONTWRITEBYTECODE=1 \
-    PIP_NO_CACHE_DIR=1 \
-    PIP_DISABLE_PIP_VERSION_CHECK=1
+    UV_COMPILE_BYTECODE=1 \
+    UV_LINK_MODE=copy
 
 # Install system dependencies
 RUN apt-get update && \
-    apt-get install -y  --no-install-recommends build-essential procps \
+    apt-get install -y --no-install-recommends build-essential procps \
     && rm -rf /var/lib/apt/lists/*
 
 RUN mkdir /mnt/inference-endpoint
 WORKDIR /mnt/inference-endpoint
-COPY pyproject.toml .
+
+# Copy lockfile + project metadata first for Docker layer caching
+COPY pyproject.toml uv.lock .python-version ./
 COPY src/ ./src/
 
 # Create a non-root user for security
@@ -33,4 +38,4 @@ RUN if ! getent group ${GROUP_ID}; then \
 USER appuser
 ENV PATH="/home/appuser/.local/bin:$PATH"
 
-RUN pip install -e .[dev,test]
+RUN uv sync --frozen --extra dev --extra test
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		3.12
viraatc marked this conversation as resolved. Outdated Show resolved Hide resolved