-
Notifications
You must be signed in to change notification settings - Fork 3.7k
195 lines (173 loc) · 8.47 KB
/
ci-tests-fabric.yml
File metadata and controls
195 lines (173 loc) · 8.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
name: Test Fabric
# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
on:
push:
branches: [master, "release/*"]
pull_request:
branches: [master, "release/*"]
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- ".actions/*"
- "requirements/ci.txt"
- "requirements/fabric/**"
- "src/lightning/fabric/**"
- "src/lightning_fabric/*"
- "tests/tests_fabric/**"
- "pyproject.toml" # includes pytest config
- ".github/workflows/ci-tests-fabric.yml"
- "!requirements/*/docs.txt"
- "!*.md"
- "!**/*.md"
schedule:
# At the end of every day
- cron: "0 0 * * *"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
defaults:
run:
shell: bash
jobs:
fabric-cpu:
runs-on: ${{ matrix.os }}
if: github.event.pull_request.draft == false
strategy:
fail-fast: false
matrix:
os: [macOS-14, ubuntu-22.04, windows-2022]
config:
# Test unified "lightning" package with PyTorch 2.1-2.5
- { pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.1" }
- { pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2.2" }
- { pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" }
- { pkg-name: "lightning", python-version: "3.12.7", pytorch-version: "2.4.1" }
- { pkg-name: "lightning", python-version: "3.12.7", pytorch-version: "2.5.1" }
# Test "fabric" package with PyTorch 2.6-2.10
- { pkg-name: "fabric", python-version: "3.12.7", pytorch-version: "2.6" }
- { pkg-name: "fabric", python-version: "3.12", pytorch-version: "2.7" }
- { pkg-name: "fabric", python-version: "3.12", pytorch-version: "2.8" }
- { pkg-name: "fabric", python-version: "3.12", pytorch-version: "2.9" }
- { pkg-name: "fabric", python-version: "3.12", pytorch-version: "2.10" }
- { pkg-name: "fabric", python-version: "3.13", pytorch-version: "2.8" }
- { pkg-name: "fabric", python-version: "3.13", pytorch-version: "2.9" }
- { pkg-name: "fabric", python-version: "3.13", pytorch-version: "2.10" }
# Test minimum supported versions (oldest)
- { pkg-name: "fabric", pytorch-version: "2.1", requires: "oldest" }
timeout-minutes: 25 # because of building grpcio on Mac
env:
PACKAGE_NAME: ${{ matrix.config.pkg-name }}
FREEZE_REQUIREMENTS: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }}
TORCH_URL_STABLE: "https://download.pytorch.org/whl/cpu/"
TORCH_URL_TEST: "https://download.pytorch.org/whl/test/cpu/"
# TODO: Remove this - Enable running MPS tests on this platform
DISABLE_MPS: ${{ matrix.os == 'macOS-14' && '1' || '0' }}
UV_TORCH_BACKEND: "cpu"
# Supply-chain guard: skip PyPI releases newer than this. See https://docs.astral.sh/uv/reference/settings/#exclude-newer
UV_EXCLUDE_NEWER: "2 days"
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install uv and set Python version
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
with:
python-version: ${{ matrix.config.python-version || '3.10' }}
# TODO: Avoid activating environment like this
# see: https://github.com/astral-sh/setup-uv/tree/v6/?tab=readme-ov-file#activate-environment
activate-environment: true
enable-cache: true
- name: Basic setup
run: uv pip install -q -r .actions/requirements.txt
- name: Append Env. vars for Linux
if: ${{ runner.os == 'Linux' }}
run: echo "GLOO_SOCKET_IFNAME=eth0" >> $GITHUB_ENV
- name: Append Env. vars for MacOS
if: ${{ runner.os == 'macOS' }}
run: echo "GLOO_SOCKET_IFNAME=lo0" >> $GITHUB_ENV
- name: Append Env. vars for Windows
if: ${{ runner.os == 'windows' }}
run: |
# Avoid issue on Windows with PyTorch 2.4: "RuntimeError: use_libuv was requested but PyTorch was build without libuv support"
echo "USE_LIBUV=0" >> $GITHUB_ENV
- name: Set min. dependencies
if: ${{ matrix.config.requires == 'oldest' }}
run: |
uv pip install -U "lightning-utilities[cli]"
python -m lightning_utilities.cli requirements set-oldest \
--req_files "['requirements/fabric/base.txt', 'requirements/fabric/strategies.txt', 'requirements/fabric/test.txt']"
uv pip install "cython<3.0" wheel
uv pip install "pyyaml==6.0.0" --no-build-isolation
# This script removes any line containing "error::FutureWarning" from pyproject.toml
uv pip install -r requirements/ci.txt
python .actions/assistant.py prune_pytest_as_errors
- name: Adjust PyTorch versions in requirements files
if: ${{ matrix.config.requires != 'oldest' }}
run: |
uv pip install -q -r requirements/ci.txt
python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py
for fpath in `ls requirements/**/*.txt`; do \
python ./adjust-torch-versions.py $fpath ${{ matrix.config.pytorch-version }}; \
done
- name: Expand Env. variables
run: |
# Switch PyTorch URL between stable and test/future
python -c "print('TORCH_URL=' + str('${{env.TORCH_URL_TEST}}' if '${{ matrix.config.pytorch-version }}' == '2.7' else '${{env.TORCH_URL_STABLE}}'))" >> $GITHUB_ENV
# Switch coverage scope
python -c "print('COVERAGE_SCOPE=' + str('lightning' if '${{matrix.config.pkg-name}}' == 'lightning' else 'lightning_fabric'))" >> $GITHUB_ENV
# if you install mono-package set dependency only for this subpackage
python -c "print('EXTRA_PREFIX=' + str('' if '${{matrix.config.pkg-name}}' != 'lightning' else 'fabric-'))" >> $GITHUB_ENV
- name: Install package & dependencies
timeout-minutes: 20
run: |
uv pip install ".[${EXTRA_PREFIX}test,${EXTRA_PREFIX}strategies]" \
--upgrade \
--find-links="${TORCH_URL}"
# ensure pkg_resources is available for older python versions & packages like onnxruntime that require it
uv pip install "setuptools<80.10.3"
uv pip list
- name: Adjust tests
if: ${{ matrix.config.pkg-name != 'lightning' }}
run: |
python .actions/assistant.py copy_replace_imports --source_dir="./tests" \
--source_import="lightning.fabric" --target_import="lightning_fabric"
- name: Testing Warnings
working-directory: tests/tests_fabric
# needs to run outside `pytest`
run: python utilities/test_warnings.py
- name: Testing Fabric
working-directory: tests/tests_fabric
# NOTE: do not include coverage report here, see: https://github.com/nedbat/coveragepy/issues/1003
run: |
echo $GITHUB_RUN_ID
python -m coverage run --source ${{ env.COVERAGE_SCOPE }} \
-m pytest -v --timeout=60 --durations=50 --random-order-seed=$GITHUB_RUN_ID \
--junitxml=junit.xml -o junit_family=legacy # NOTE: for Codecov's test results
- name: Statistics
if: success()
working-directory: tests/tests_fabric
run: |
coverage report
coverage xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@e79a6962e0d4c0c17b229090214935d2e33f8354 # v6.0.1
# see: https://github.com/actions/toolkit/issues/399
continue-on-error: true
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: tests/tests_fabric/coverage.xml
flags: ${{ env.COVERAGE_SCOPE }},cpu,pytest,python${{ matrix.config.python-version }}
name: CPU-coverage
fail_ci_if_error: false
- name: Minimize uv cache
run: uv cache prune --ci
fabric-cpu-guardian:
runs-on: ubuntu-latest
needs: fabric-cpu
if: always()
steps:
- run: echo "${{ needs.fabric-cpu.result }}"
- name: failing...
if: needs.fabric-cpu.result == 'failure'
run: exit 1
- name: cancelled or skipped...
if: contains(fromJSON('["cancelled", "skipped"]'), needs.fabric-cpu.result)
timeout-minutes: 1
run: sleep 90