-
Notifications
You must be signed in to change notification settings - Fork 463
195 lines (183 loc) · 8.47 KB
/
Copy pathbenchmarks_pr.yml
File metadata and controls
195 lines (183 loc) · 8.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
name: Continuous Benchmark (PR)
on:
pull_request:
permissions:
contents: read
actions: read
concurrency:
# Documentation suggests ${{ github.head_ref }}, but that's only available on pull_request/pull_request_target triggers, so using ${{ github.ref }}.
# On master, we want all builds to complete even if merging happens faster to make it easier to discover at which point something broke.
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && format('ci-master-{0}', github.sha) || format('ci-{0}', github.ref) }}
cancel-in-progress: true
jobs:
benchmark:
name: ${{ matrix.device }} Pytest benchmark
runs-on: linux.g5.4xlarge.nvidia.gpu
env:
PR_BASE_REPO: ${{ github.event.pull_request.base.repo.full_name }}
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
strategy:
fail-fast: false
matrix:
include:
- device: CPU
image: nvidia/cuda:12.6.3-cudnn-runtime-ubuntu22.04
- device: GPU
image: nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
defaults:
run:
shell: bash -l {0}
container:
image: ${{ matrix.image }}
options: --gpus all --shm-size=8g
steps:
- name: Who triggered this?
run: |
echo "Action triggered by ${{ github.event.pull_request.html_url }}"
- name: Check ldd --version
run: ldd --version
- name: Install git for checkout
run: |
apt-get update -y
apt-get install -y git ca-certificates
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
repository: ${{ github.event.pull_request.head.repo.full_name }}
ref: ${{ github.event.pull_request.head.sha }}
- name: Python Setup
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Setup Environment
run: |
export TZ=Europe/London
export DEBIAN_FRONTEND=noninteractive # tzdata bug
apt-get update -y
apt-get install software-properties-common cmake -y
add-apt-repository ppa:git-core/candidate -y
apt-get update -y
apt-get upgrade -y
apt-get -y install libglu1-mesa libgl1-mesa-glx libosmesa6 gcc curl g++ unzip wget libglfw3-dev libgles2-mesa-dev libglew-dev sudo git cmake libz-dev libpython3.10-dev
- name: Setup git
run: git config --global --add safe.directory /__w/rl/rl
- name: setup Path
run: |
echo /usr/local/bin >> "$GITHUB_PATH"
- name: Setup benchmarks
run: |
BENCHMARK_SITE_DIR="$(mktemp -d)"
{
echo "BASE_SHA=${PR_BASE_SHA:0:8}"
echo "HEAD_SHA=${PR_HEAD_SHA:0:8}"
echo "BASELINE_JSON=$(mktemp)"
echo "CONTENDER_JSON=$(mktemp)"
echo "BENCHMARK_SITE_DIR=${BENCHMARK_SITE_DIR}"
} >> "$GITHUB_ENV"
cat > "${BENCHMARK_SITE_DIR}/sitecustomize.py" <<'PY'
import warnings
try:
import torch
torch._dynamo.config.reorderable_logging_functions.add(warnings.warn)
except (AttributeError, ImportError):
pass
PY
- name: Install benchmark dependencies
run: |
set -euxo pipefail
python3.10 -m venv --system-site-packages ./py310
source ./py310/bin/activate
export PYTHON_INCLUDE_DIR=/usr/include/python3.10
# NB: the nightly/cu128 channel is frozen (torch and torchvision builds
# drifted out of sync there, making install ResolutionImpossible). Use the
# live cu126 nightly channel; its CUDA 12.6 wheels run fine on the GPU
# runner via driver backward-compatibility.
# The --extra-index-url onto PyPI is required: torch nightly pulls in
# transitive deps (e.g. spmd-types) that are only shipped as sdists on the
# torch channel, and building those sdists needs setuptools/wheel which the
# torch index does not host. torch/torchvision still resolve from nightly
# (their dev versions outrank any PyPI stable), and assert_torch_version.sh
# below fails the job loudly if that ever stops holding.
python3.10 -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu126 --extra-index-url https://pypi.org/simple -U
python3.10 -m pip install ninja pytest pytest-benchmark pytest-timeout "hoptorch>=0.1.4" "mujoco>=3.8.1,<3.9.0" "dm_control>=1.0.41" "gym[accept-rom-license,atari]" transformers accelerate ray
python3.10 -m pip install "pybind11[global]"
python3.10 -m pip install cloudpickle packaging importlib_metadata numpy orjson "pyvers>=0.2.0,<0.3.0"
python3.10 -m pip install --no-deps git+https://github.com/pytorch/tensordict
python3.10 -m pip install safetensors tqdm pandas numpy matplotlib
bash .github/unittest/helpers/assert_torch_version.sh nightly
bash .github/unittest/helpers/assert_torch_tensordict_versions.sh nightly
- name: Run baseline benchmarks
run: |
set -euxo pipefail
source ./py310/bin/activate
export PYTHON_INCLUDE_DIR=/usr/include/python3.10
export TORCHRL_BENCHMARK_DEVICE="${{ matrix.device }}"
if [ "${TORCHRL_BENCHMARK_DEVICE}" = "CPU" ]; then
export CUDA_VISIBLE_DEVICES=
fi
git fetch --no-tags --depth=1 "https://github.com/${PR_BASE_REPO}.git" "${PR_BASE_SHA}"
git checkout --detach "${PR_BASE_SHA}"
rm -rf build
python3.10 -m pip install -e . --no-build-isolation --no-deps
if [ "${{ matrix.device }}" = "GPU" ]; then
# test import and fail early if the GPU runner did not expose CUDA
nvcc --version
python -c "import torch; assert torch.cuda.device_count()"
python -c "import torchrl._torchrl as ext; assert hasattr(ext, 'CudaSumSegmentTreeFp32')"
fi
REPO_ROOT="$(pwd)"
cd "${REPO_ROOT}/benchmarks"
export TORCHDYNAMO_INLINE_INBUILT_NN_MODULES=1
export COMPOSITE_LP_AGGREGATE=0
export TD_GET_DEFAULTS_TO_NONE=1
export PYTHONPATH="${BENCHMARK_SITE_DIR}${PYTHONPATH:+:${PYTHONPATH}}"
python -m pytest -vvv --rank 0 --timeout=240 --benchmark-only --benchmark-json "${BASELINE_JSON}" --ignore test_llm.py .
- name: Run PR benchmarks
run: |
set -euxo pipefail
source ./py310/bin/activate
export PYTHON_INCLUDE_DIR=/usr/include/python3.10
export TORCHRL_BENCHMARK_DEVICE="${{ matrix.device }}"
if [ "${TORCHRL_BENCHMARK_DEVICE}" = "CPU" ]; then
export CUDA_VISIBLE_DEVICES=
fi
git checkout --detach "${PR_HEAD_SHA}"
rm -rf build
python3.10 -m pip install -e . --no-build-isolation --no-deps
if [ "${{ matrix.device }}" = "GPU" ]; then
# test import and fail early if the GPU runner did not expose CUDA
nvcc --version
python -c "import torch; assert torch.cuda.device_count()"
python -c "import torchrl._torchrl as ext; assert hasattr(ext, 'CudaSumSegmentTreeFp32')"
fi
REPO_ROOT="$(pwd)"
cd "${REPO_ROOT}/benchmarks"
export TORCHDYNAMO_INLINE_INBUILT_NN_MODULES=1
export COMPOSITE_LP_AGGREGATE=0
export TD_GET_DEFAULTS_TO_NONE=1
export PYTHONPATH="${BENCHMARK_SITE_DIR}${PYTHONPATH:+:${PYTHONPATH}}"
python -m pytest -vvv --rank 0 --timeout=240 --benchmark-only --benchmark-json "${CONTENDER_JSON}" --ignore test_llm.py .
- name: Upload PR benchmark results
if: ${{ always() }}
run: |
set -euxo pipefail
mkdir -p benchmark-results
cp "${BASELINE_JSON}" benchmark-results/baseline.json
cp "${CONTENDER_JSON}" benchmark-results/contender.json
cat > benchmark-results/metadata.json <<EOF
{
"device": "${{ matrix.device }}",
"pr_number": ${{ github.event.pull_request.number }},
"base_sha": "${PR_BASE_SHA}",
"head_sha": "${PR_HEAD_SHA}",
"run_id": "${{ github.run_id }}"
}
EOF
- name: Upload PR benchmark artifact
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.device }}-benchmark-pr-results
path: benchmark-results