Skip to content

[BugFix] Make the vLLM FP32 plugin opt-in so importing torchrl can't … #9905

[BugFix] Make the vLLM FP32 plugin opt-in so importing torchrl can't …

[BugFix] Make the vLLM FP32 plugin opt-in so importing torchrl can't … #9905

Workflow file for this run

name: Continuous Benchmark
on:
push:
branches:
- main
workflow_dispatch:
workflow_call:
inputs:
skip-upload:
description: 'Skip benchmark upload to gh-pages'
type: boolean
default: false
permissions:
id-token: write
deployments: write
contents: write
concurrency:
# Documentation suggests ${{ github.head_ref }}, but that's only available on pull_request/pull_request_target triggers, so using ${{ github.ref }}.
# On master, we want all builds to complete even if merging happens faster to make it easier to discover at which point something broke.
group: benchmarks-${{ github.ref == 'refs/heads/main' && format('ci-master-{0}', github.sha) || format('ci-{0}', github.ref) }}
cancel-in-progress: true
jobs:
benchmark:
name: ${{ matrix.device }} Pytest benchmark
runs-on: linux.g5.4xlarge.nvidia.gpu
strategy:
fail-fast: false
matrix:
include:
- device: CPU
image: nvidia/cuda:12.6.3-cudnn-runtime-ubuntu22.04
- device: GPU
image: nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
defaults:
run:
shell: bash -l {0}
container:
image: ${{ matrix.image }}
options: --gpus all --shm-size=8g
steps:
- name: Who triggered this?
run: |
echo "Action triggered by ${{ github.event_name }} on ${{ github.ref }}"
- name: Check ldd --version
run: ldd --version
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 50 # this is to make sure we obtain the target base commit
- name: Python Setup
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Setup Environment
run: |
export TZ=Europe/London
export DEBIAN_FRONTEND=noninteractive # tzdata bug
apt-get update -y
apt-get install software-properties-common cmake -y
add-apt-repository ppa:git-core/candidate -y
apt-get update -y
apt-get upgrade -y
apt-get -y install libglu1-mesa libgl1-mesa-glx libosmesa6 gcc curl g++ unzip wget libglfw3-dev libgles2-mesa-dev libglew-dev sudo git cmake libz-dev libpython3.10-dev
- name: Setup git
run: |
git config --global --add safe.directory /__w/rl/rl
git config --global user.name "github-action-benchmark"
git config --global user.email "github@users.noreply.github.com"
- name: setup Path
run: |
echo /usr/local/bin >> "$GITHUB_PATH"
- name: Run
run: |
set -euxo pipefail
python3.10 -m venv --system-site-packages ./py310
source ./py310/bin/activate
export PYTHON_INCLUDE_DIR=/usr/include/python3.10
export TORCHRL_BENCHMARK_DEVICE="${{ matrix.device }}"
if [ "${TORCHRL_BENCHMARK_DEVICE}" = "CPU" ]; then
export CUDA_VISIBLE_DEVICES=
fi
# NB: the nightly/cu128 channel is frozen (torch and torchvision builds
# drifted out of sync there, making install ResolutionImpossible). Use the
# live cu126 nightly channel; its CUDA 12.6 wheels run fine on the GPU
# runner via driver backward-compatibility.
# The --extra-index-url onto PyPI is required: torch nightly pulls in
# transitive deps (e.g. spmd-types) that are only shipped as sdists on the
# torch channel, and building those sdists needs setuptools/wheel which the
# torch index does not host. torch/torchvision still resolve from nightly
# (their dev versions outrank any PyPI stable), and assert_torch_version.sh
# below fails the job loudly if that ever stops holding.
python3.10 -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu126 --extra-index-url https://pypi.org/simple -U
python3.10 -m pip install ninja pytest pytest-benchmark pytest-timeout "hoptorch>=0.1.4" "mujoco>=3.8.1,<3.9.0" "dm_control>=1.0.41" "gym[accept-rom-license,atari]" transformers accelerate
python -m pip install "pybind11[global]"
python3.10 -m pip install cloudpickle packaging importlib_metadata numpy orjson "pyvers>=0.2.0,<0.3.0"
python3.10 -m pip install --no-deps git+https://github.com/pytorch/tensordict
python3.10 -m pip install safetensors tqdm pandas numpy matplotlib ray
python3.10 -m pip install -e . --no-build-isolation --no-deps
bash .github/unittest/helpers/assert_torch_version.sh nightly
bash .github/unittest/helpers/assert_torch_tensordict_versions.sh nightly
if [ "${{ matrix.device }}" = "GPU" ]; then
# test import and fail early if the GPU runner did not expose CUDA
nvcc --version
python -c "import torch; assert torch.cuda.device_count()"
python -c "import torchrl._torchrl as ext; assert hasattr(ext, 'CudaSumSegmentTreeFp32')"
fi
cd benchmarks/
export TORCHDYNAMO_INLINE_INBUILT_NN_MODULES=1
export COMPOSITE_LP_AGGREGATE=0
export TD_GET_DEFAULTS_TO_NONE=1
BENCHMARK_SITE_DIR="$(mktemp -d)"
cat > "${BENCHMARK_SITE_DIR}/sitecustomize.py" <<'PY'
import warnings
try:
import torch
torch._dynamo.config.reorderable_logging_functions.add(warnings.warn)
except (AttributeError, ImportError):
pass
PY
export PYTHONPATH="${BENCHMARK_SITE_DIR}${PYTHONPATH:+:${PYTHONPATH}}"
python -m pytest -vvv --rank 0 --timeout=240 --benchmark-only --benchmark-json output.json --ignore test_llm.py .
# Upload benchmark results for main branch and manual dispatch runs.
- name: Upload benchmark results
uses: actions/upload-artifact@v4
if: ${{ !inputs.skip-upload && (github.ref == 'refs/heads/main' || github.event_name == 'workflow_dispatch') }}
with:
name: ${{ matrix.device }}-benchmark-results
path: benchmarks/output.json
# Upload benchmark results to gh-pages branch.
benchmark-upload:
name: Upload benchmark results
runs-on: ubuntu-latest
needs: benchmark
if: ${{ !inputs.skip-upload && (github.ref == 'refs/heads/main' || github.event_name == 'workflow_dispatch') }}
steps:
- name: Show upload trigger reason
run: |
if [ "${{ github.ref }}" == "refs/heads/main" ]; then
echo "Uploading benchmarks because this is the main branch"
elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
echo "Uploading benchmarks because of manual workflow dispatch"
fi
- name: Checkout
uses: actions/checkout@v4
- name: Download CPU benchmark results
uses: actions/download-artifact@v4
with:
name: CPU-benchmark-results
path: cpu-results
- name: Download GPU benchmark results
uses: actions/download-artifact@v4
with:
name: GPU-benchmark-results
path: gpu-results
- name: Show benchmark results summary
run: |
echo "=== CPU Benchmark Results ==="
if [ -f "cpu-results/output.json" ]; then
echo "CPU results found and will be uploaded"
else
echo "No CPU results found"
fi
echo "=== GPU Benchmark Results ==="
if [ -f "gpu-results/output.json" ]; then
echo "GPU results found and will be uploaded"
else
echo "No GPU results found"
fi
- name: Store CPU benchmark results
uses: benchmark-action/github-action-benchmark@v1
with:
name: CPU Benchmark Results
tool: 'pytest'
output-file-path: cpu-results/output.json
fail-on-alert: true
alert-threshold: '200%'
alert-comment-cc-users: '@vmoens'
comment-on-alert: false
github-token: ${{ secrets.GITHUB_TOKEN }}
gh-pages-branch: gh-pages
auto-push: true
# Cap the on-disk benchmark history (dev/bench/data.js) so it cannot
# grow without bound and bloat the repository / clone size.
max-items-in-chart: 250
- name: Store GPU benchmark results
uses: benchmark-action/github-action-benchmark@v1
with:
name: GPU Benchmark Results
tool: 'pytest'
output-file-path: gpu-results/output.json
fail-on-alert: true
alert-threshold: '200%'
alert-comment-cc-users: '@vmoens'
comment-on-alert: false
github-token: ${{ secrets.GITHUB_TOKEN }}
gh-pages-branch: gh-pages
auto-push: true
# Cap the on-disk benchmark history (dev/bench/data.js) so it cannot
# grow without bound and bloat the repository / clone size.
max-items-in-chart: 250
# Use regular comments instead of PR reviews to avoid permission issues
comment-style: 'github'