rl/.github/workflows/benchmarks_pr.yml at 3d5124fca4edd8c7e365d15ec99fa3692903f7b2 · pytorch/rl · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
name: Continuous Benchmark (PR)
on:
  pull_request:

permissions:
  contents: read
  actions: read

concurrency:
  # Documentation suggests ${{ github.head_ref }}, but that's only available on pull_request/pull_request_target triggers, so using ${{ github.ref }}.
  # On master, we want all builds to complete even if merging happens faster to make it easier to discover at which point something broke.
  group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && format('ci-master-{0}', github.sha) || format('ci-{0}', github.ref) }}
  cancel-in-progress: true

jobs:

  benchmark:
    name: ${{ matrix.device }} Pytest benchmark
    runs-on: linux.g5.4xlarge.nvidia.gpu
    env:
      PR_BASE_REPO: ${{ github.event.pull_request.base.repo.full_name }}
      PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
      PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - device: CPU
            image: nvidia/cuda:12.6.3-cudnn-runtime-ubuntu22.04
          - device: GPU
            image: nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
    defaults:
      run:
        shell: bash -l {0}
    container:
      image: ${{ matrix.image }}
      options: --gpus all --shm-size=8g
    steps:
      - name: Who triggered this?
        run: |
          echo "Action triggered by ${{ github.event.pull_request.html_url }}"
      - name: Check ldd --version
        run: ldd --version
      - name: Install git for checkout
        run: |
          apt-get update -y
          apt-get install -y git ca-certificates
      - name: Checkout
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          repository: ${{ github.event.pull_request.head.repo.full_name }}
          ref: ${{ github.event.pull_request.head.sha }}
      - name: Python Setup
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'
      - name: Setup Environment
        run: |
          export TZ=Europe/London
          export DEBIAN_FRONTEND=noninteractive  # tzdata bug
          apt-get update -y
          apt-get install software-properties-common cmake -y
          add-apt-repository ppa:git-core/candidate -y
          apt-get update -y
          apt-get upgrade -y
          apt-get -y install libglu1-mesa libgl1-mesa-glx libosmesa6 gcc curl g++ unzip wget libglfw3-dev libgles2-mesa-dev libglew-dev sudo git cmake libz-dev libpython3.10-dev
      - name: Setup git
        run: git config --global --add safe.directory /__w/rl/rl
      - name: setup Path
        run: |
          echo /usr/local/bin >> "$GITHUB_PATH"
      - name: Setup benchmarks
        run: |
          BENCHMARK_SITE_DIR="$(mktemp -d)"
          {
            echo "BASE_SHA=${PR_BASE_SHA:0:8}"
            echo "HEAD_SHA=${PR_HEAD_SHA:0:8}"
            echo "BASELINE_JSON=$(mktemp)"
            echo "CONTENDER_JSON=$(mktemp)"
            echo "BENCHMARK_SITE_DIR=${BENCHMARK_SITE_DIR}"
          } >> "$GITHUB_ENV"
          cat > "${BENCHMARK_SITE_DIR}/sitecustomize.py" <<'PY'
          import warnings

          try:
              import torch

              torch._dynamo.config.reorderable_logging_functions.add(warnings.warn)
          except (AttributeError, ImportError):
              pass
          PY
      - name: Install benchmark dependencies
        run: |
          set -euxo pipefail
          python3.10 -m venv --system-site-packages ./py310
          source ./py310/bin/activate
          export PYTHON_INCLUDE_DIR=/usr/include/python3.10

          # NB: the nightly/cu128 channel is frozen (torch and torchvision builds
          # drifted out of sync there, making install ResolutionImpossible). Use the
          # live cu126 nightly channel; its CUDA 12.6 wheels run fine on the GPU
          # runner via driver backward-compatibility.
          # The --extra-index-url onto PyPI is required: torch nightly pulls in
          # transitive deps (e.g. spmd-types) that are only shipped as sdists on the
          # torch channel, and building those sdists needs setuptools/wheel which the
          # torch index does not host. torch/torchvision still resolve from nightly
          # (their dev versions outrank any PyPI stable), and assert_torch_version.sh
          # below fails the job loudly if that ever stops holding.
          python3.10 -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu126 --extra-index-url https://pypi.org/simple -U
          python3.10 -m pip install ninja pytest pytest-benchmark pytest-timeout "hoptorch>=0.1.4" "mujoco>=3.8.1,<3.9.0" "dm_control>=1.0.41" "gym[accept-rom-license,atari]" transformers accelerate ray
          python3.10 -m pip install "pybind11[global]"
          python3.10 -m pip install cloudpickle packaging importlib_metadata numpy orjson "pyvers>=0.2.0,<0.3.0"
          python3.10 -m pip install --no-deps git+https://github.com/pytorch/tensordict
          python3.10 -m pip install safetensors tqdm pandas numpy matplotlib

          bash .github/unittest/helpers/assert_torch_version.sh nightly
          bash .github/unittest/helpers/assert_torch_tensordict_versions.sh nightly
      - name: Run baseline benchmarks
        run: |
          set -euxo pipefail
          source ./py310/bin/activate
          export PYTHON_INCLUDE_DIR=/usr/include/python3.10
          export TORCHRL_BENCHMARK_DEVICE="${{ matrix.device }}"
          if [ "${TORCHRL_BENCHMARK_DEVICE}" = "CPU" ]; then
            export CUDA_VISIBLE_DEVICES=
          fi
          git fetch --no-tags --depth=1 "https://github.com/${PR_BASE_REPO}.git" "${PR_BASE_SHA}"
          git checkout --detach "${PR_BASE_SHA}"
          rm -rf build
          python3.10 -m pip install -e . --no-build-isolation --no-deps

          if [ "${{ matrix.device }}" = "GPU" ]; then
            # test import and fail early if the GPU runner did not expose CUDA
            nvcc --version
            python -c "import torch; assert torch.cuda.device_count()"
            python -c "import torchrl._torchrl as ext; assert hasattr(ext, 'CudaSumSegmentTreeFp32')"
          fi

          REPO_ROOT="$(pwd)"
          cd "${REPO_ROOT}/benchmarks"
          export TORCHDYNAMO_INLINE_INBUILT_NN_MODULES=1
          export COMPOSITE_LP_AGGREGATE=0
          export TD_GET_DEFAULTS_TO_NONE=1
          export PYTHONPATH="${BENCHMARK_SITE_DIR}${PYTHONPATH:+:${PYTHONPATH}}"
          python -m pytest -vvv --rank 0 --timeout=240 --benchmark-only --benchmark-json "${BASELINE_JSON}" --ignore test_llm.py .
      - name: Run PR benchmarks
        run: |
          set -euxo pipefail
          source ./py310/bin/activate
          export PYTHON_INCLUDE_DIR=/usr/include/python3.10
          export TORCHRL_BENCHMARK_DEVICE="${{ matrix.device }}"
          if [ "${TORCHRL_BENCHMARK_DEVICE}" = "CPU" ]; then
            export CUDA_VISIBLE_DEVICES=
          fi
          git checkout --detach "${PR_HEAD_SHA}"
          rm -rf build
          python3.10 -m pip install -e . --no-build-isolation --no-deps

          if [ "${{ matrix.device }}" = "GPU" ]; then
            # test import and fail early if the GPU runner did not expose CUDA
            nvcc --version
            python -c "import torch; assert torch.cuda.device_count()"
            python -c "import torchrl._torchrl as ext; assert hasattr(ext, 'CudaSumSegmentTreeFp32')"
          fi

          REPO_ROOT="$(pwd)"
          cd "${REPO_ROOT}/benchmarks"
          export TORCHDYNAMO_INLINE_INBUILT_NN_MODULES=1
          export COMPOSITE_LP_AGGREGATE=0
          export TD_GET_DEFAULTS_TO_NONE=1
          export PYTHONPATH="${BENCHMARK_SITE_DIR}${PYTHONPATH:+:${PYTHONPATH}}"
          python -m pytest -vvv --rank 0 --timeout=240 --benchmark-only --benchmark-json "${CONTENDER_JSON}" --ignore test_llm.py .
      - name: Upload PR benchmark results
        if: ${{ always() }}
        run: |
          set -euxo pipefail
          mkdir -p benchmark-results
          cp "${BASELINE_JSON}" benchmark-results/baseline.json
          cp "${CONTENDER_JSON}" benchmark-results/contender.json
          cat > benchmark-results/metadata.json <<EOF
          {
            "device": "${{ matrix.device }}",
            "pr_number": ${{ github.event.pull_request.number }},
            "base_sha": "${PR_BASE_SHA}",
            "head_sha": "${PR_HEAD_SHA}",
            "run_id": "${{ github.run_id }}"
          }
          EOF
      - name: Upload PR benchmark artifact
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.device }}-benchmark-pr-results
          path: benchmark-results