Skip to content

Commit 239e437

Browse files
authored
Merge branch 'master' into prevent-cuda-init-bad-device-by-check-matmul-precision
2 parents c2f367c + 1120456 commit 239e437

26 files changed

Lines changed: 397 additions & 189 deletions

.github/checkgroup.yml

Lines changed: 90 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -21,62 +21,63 @@ subprojects:
2121
checks:
2222
- "pl-cpu-guardian" # aggregated check for all cases
2323

24-
- id: "pytorch_lightning: lit GPU"
25-
paths:
26-
- ".actions/*"
27-
- ".lightning/workflows/pytorch.yml"
28-
# only the azure GPU workflow runs the examples
29-
# all examples don't need to be added because they aren't used in CI, but these are
30-
- "examples/run_pl_examples.sh"
31-
- "examples/pytorch/basics/backbone_image_classifier.py"
32-
- "examples/pytorch/basics/autoencoder.py"
33-
- "requirements/pytorch/**"
34-
- "src/lightning/__init__.py"
35-
- "src/lightning/__setup__.py"
36-
- "src/lightning/__version__.py"
37-
- "src/lightning/pytorch/**"
38-
- "src/pytorch_lightning/*"
39-
- "tests/tests_pytorch/**"
40-
- "tests/run_standalone_*.sh"
41-
- "pyproject.toml" # includes pytest config
42-
- "requirements/fabric/**"
43-
- "src/lightning/fabric/**"
44-
- "src/lightning_fabric/*"
45-
- "!requirements/docs.txt"
46-
- "!requirements/*/docs.txt"
47-
- "!*.md"
48-
- "!**/*.md"
49-
checks:
50-
- "pytorch.yml / Lit Job (nvidia/cuda:12.1.1-devel-ubuntu22.04, pytorch, 3.10)"
51-
- "pytorch.yml / Lit Job (lightning, 3.12)"
52-
- "pytorch.yml / Lit Job (pytorch, 3.12)"
24+
# Temporarily Disabled until LitBot is available again
25+
# - id: "pytorch_lightning: lit GPU"
26+
# paths:
27+
# - ".actions/*"
28+
# - ".lightning/workflows/pytorch.yml"
29+
# # only the azure GPU workflow runs the examples
30+
# # all examples don't need to be added because they aren't used in CI, but these are
31+
# - "examples/run_pl_examples.sh"
32+
# - "examples/pytorch/basics/backbone_image_classifier.py"
33+
# - "examples/pytorch/basics/autoencoder.py"
34+
# - "requirements/pytorch/**"
35+
# - "src/lightning/__init__.py"
36+
# - "src/lightning/__setup__.py"
37+
# - "src/lightning/__version__.py"
38+
# - "src/lightning/pytorch/**"
39+
# - "src/pytorch_lightning/*"
40+
# - "tests/tests_pytorch/**"
41+
# - "tests/run_standalone_*.sh"
42+
# - "pyproject.toml" # includes pytest config
43+
# - "requirements/fabric/**"
44+
# - "src/lightning/fabric/**"
45+
# - "src/lightning_fabric/*"
46+
# - "!requirements/docs.txt"
47+
# - "!requirements/*/docs.txt"
48+
# - "!*.md"
49+
# - "!**/*.md"
50+
# checks:
51+
# - "pytorch.yml / Lit Job (nvidia/cuda:12.1.1-devel-ubuntu22.04, pytorch, 3.10)"
52+
# - "pytorch.yml / Lit Job (lightning, 3.12)"
53+
# - "pytorch.yml / Lit Job (pytorch, 3.12)"
5354

54-
- id: "Benchmarks"
55-
paths:
56-
- ".lightning/workflows/benchmark.yml"
57-
- "requirements/fabric/**"
58-
- "requirements/pytorch/**"
59-
- "src/lightning/fabric/**"
60-
- "src/lightning/pytorch/**"
61-
- "tests/parity_fabric/**"
62-
- "tests/parity_pytorch/**"
63-
- "!requirements/fabric/docs.txt"
64-
- "!requirements/pytorch/docs.txt"
65-
- "!*.md"
66-
- "!**/*.md"
67-
checks:
68-
- "benchmark.yml / Lit Job (fabric)"
69-
- "benchmark.yml / Lit Job (pytorch)"
70-
71-
# Temporarily disabled
72-
# - id: "pytorch-lightning: TPU workflow"
73-
# paths:
74-
# # tpu CI availability is very limited, so we only require tpu tests
75-
# # to pass when their configurations are modified
76-
# - ".github/workflows/tpu-tests.yml.disabled"
77-
# - "tests/tests_pytorch/run_tpu_tests.sh"
78-
# checks:
79-
# - "test-on-tpus (pytorch, pjrt, v4-8)"
55+
# - id: "Benchmarks"
56+
# paths:
57+
# - ".lightning/workflows/benchmark.yml"
58+
# - "requirements/fabric/**"
59+
# - "requirements/pytorch/**"
60+
# - "src/lightning/fabric/**"
61+
# - "src/lightning/pytorch/**"
62+
# - "tests/parity_fabric/**"
63+
# - "tests/parity_pytorch/**"
64+
# - "!requirements/fabric/docs.txt"
65+
# - "!requirements/pytorch/docs.txt"
66+
# - "!*.md"
67+
# - "!**/*.md"
68+
# checks:
69+
# - "benchmark.yml / Lit Job (fabric)"
70+
# - "benchmark.yml / Lit Job (pytorch)"
71+
#
72+
# # Temporarily disabled
73+
# # - id: "pytorch-lightning: TPU workflow"
74+
# # paths:
75+
# # # tpu CI availability is very limited, so we only require tpu tests
76+
# # # to pass when their configurations are modified
77+
# # - ".github/workflows/tpu-tests.yml.disabled"
78+
# # - "tests/tests_pytorch/run_tpu_tests.sh"
79+
# # checks:
80+
# # - "test-on-tpus (pytorch, pjrt, v4-8)"
8081

8182
- id: "fabric: Docs"
8283
paths:
@@ -129,40 +130,40 @@ subprojects:
129130
checks:
130131
- "fabric-cpu-guardian" # aggregated check for all cases
131132

132-
- id: "lightning_fabric: lit GPU"
133-
paths:
134-
- ".actions/*"
135-
- ".lightning/workflows/fabric.yml"
136-
- "examples/fabric/**"
137-
- "examples/run_fabric_examples.sh"
138-
- "requirements/fabric/**"
139-
- "src/lightning/__init__.py"
140-
- "src/lightning/__setup__.py"
141-
- "src/lightning/__version__.py"
142-
- "src/lightning/fabric/**"
143-
- "src/lightning_fabric/*"
144-
- "tests/tests_fabric/**"
145-
- "tests/run_standalone_*.sh"
146-
- "pyproject.toml" # includes pytest config
147-
- "!requirements/*/docs.txt"
148-
- "!*.md"
149-
- "!**/*.md"
150-
checks:
151-
- "fabric.yml / Lit Job (nvidia/cuda:12.1.1-devel-ubuntu22.04, fabric, 3.10)"
152-
- "fabric.yml / Lit Job (fabric, 3.12)"
153-
- "fabric.yml / Lit Job (lightning, 3.12)"
154-
155-
# Temporarily disabled
156-
# - id: "lightning_fabric: TPU workflow"
157-
# paths:
158-
# # tpu CI availability is very limited, so we only require tpu tests
159-
# # to pass when their configurations are modified
160-
# - ".github/workflows/tpu-tests.yml.disabled"
161-
# - "tests/tests_fabric/run_tpu_tests.sh"
162-
# checks:
163-
# - "test-on-tpus (pytorch, pjrt, v4-8)"
164-
165-
# SECTION: common
133+
# - id: "lightning_fabric: lit GPU"
134+
# paths:
135+
# - ".actions/*"
136+
# - ".lightning/workflows/fabric.yml"
137+
# - "examples/fabric/**"
138+
# - "examples/run_fabric_examples.sh"
139+
# - "requirements/fabric/**"
140+
# - "src/lightning/__init__.py"
141+
# - "src/lightning/__setup__.py"
142+
# - "src/lightning/__version__.py"
143+
# - "src/lightning/fabric/**"
144+
# - "src/lightning_fabric/*"
145+
# - "tests/tests_fabric/**"
146+
# - "tests/run_standalone_*.sh"
147+
# - "pyproject.toml" # includes pytest config
148+
# - "!requirements/*/docs.txt"
149+
# - "!*.md"
150+
# - "!**/*.md"
151+
# checks:
152+
# - "fabric.yml / Lit Job (nvidia/cuda:12.1.1-devel-ubuntu22.04, fabric, 3.10)"
153+
# - "fabric.yml / Lit Job (fabric, 3.12)"
154+
# - "fabric.yml / Lit Job (lightning, 3.12)"
155+
#
156+
# # Temporarily disabled
157+
# # - id: "lightning_fabric: TPU workflow"
158+
# # paths:
159+
# # # tpu CI availability is very limited, so we only require tpu tests
160+
# # # to pass when their configurations are modified
161+
# # - ".github/workflows/tpu-tests.yml.disabled"
162+
# # - "tests/tests_fabric/run_tpu_tests.sh"
163+
# # checks:
164+
# # - "test-on-tpus (pytorch, pjrt, v4-8)"
165+
#
166+
# # SECTION: common
166167

167168
- id: "mypy"
168169
paths:

.github/workflows/_build-packages.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ jobs:
2525
matrix:
2626
pkg-name: ${{ fromJSON(inputs.pkg-names) }}
2727
steps:
28-
- uses: actions/checkout@v6
29-
- uses: actions/setup-python@v6
28+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
29+
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
3030
with:
3131
python-version: "3.x"
3232

@@ -41,7 +41,7 @@ jobs:
4141
mkdir -p pypi/${{ matrix.pkg-name }}
4242
cp dist/* pypi/${{ matrix.pkg-name }}/
4343
44-
- uses: actions/upload-artifact@v7
44+
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
4545
with:
4646
name: ${{ inputs.artifact-name }}-${{ matrix.pkg-name }}
4747
path: pypi
@@ -51,7 +51,7 @@ jobs:
5151
needs: build-packages
5252
runs-on: ubuntu-22.04
5353
steps:
54-
- uses: actions/download-artifact@v8
54+
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
5555
with: # download all build artifacts
5656
pattern: ${{ inputs.artifact-name }}-*
5757
merge-multiple: true
@@ -62,7 +62,7 @@ jobs:
6262
6363
- name: Keep artifact
6464
run: python -c "print('DAYS=' + str(5 if '${{ github.event_name }}'.startswith('pull_request') else 0))" >> $GITHUB_ENV
65-
- uses: actions/upload-artifact@v7
65+
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
6666
with:
6767
name: ${{ inputs.artifact-name }}
6868
path: pypi

.github/workflows/_legacy-checkpoints.yml

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ on:
4444
env:
4545
LEGACY_FOLDER: "tests/legacy"
4646
TORCH_URL: "https://download.pytorch.org/whl/cpu/"
47+
# Supply-chain guard: skip PyPI releases newer than this. See https://docs.astral.sh/uv/reference/settings/#exclude-newer
48+
UV_EXCLUDE_NEWER: "2 days"
4749

4850
defaults:
4951
run:
@@ -55,10 +57,10 @@ jobs:
5557
outputs:
5658
pl-version: ${{ steps.decide-version.outputs.pl-version }}
5759
steps:
58-
- uses: actions/checkout@v6
60+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
5961

6062
- name: Install uv and set Python version
61-
uses: astral-sh/setup-uv@v7
63+
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
6264
with:
6365
python-version: "3.10"
6466
# TODO: Avoid activating environment like this
@@ -71,12 +73,15 @@ jobs:
7173
PACKAGE_NAME: pytorch
7274
FREEZE_REQUIREMENTS: 1
7375
timeout-minutes: 20
74-
run: uv pip install . --extra-index-url="${TORCH_URL}"
76+
# `unsafe-best-match` required: PyTorch extra-index mirrors an older fsspec that
77+
# conflicts with lightning's version range under UV_EXCLUDE_NEWER.
78+
run: uv pip install . --extra-index-url="${TORCH_URL}" --index-strategy unsafe-best-match
7579
if: inputs.pl_version == ''
7680

7781
- name: Install PL version
7882
timeout-minutes: 20
79-
run: uv pip install "pytorch-lightning==${{ inputs.pl_version }}" --extra-index-url="${TORCH_URL}"
83+
# `unsafe-best-match` required: same fsspec/PyTorch-index conflict as above.
84+
run: uv pip install "pytorch-lightning==${{ inputs.pl_version }}" --extra-index-url="${TORCH_URL}" --index-strategy unsafe-best-match
8085
if: inputs.pl_version != ''
8186

8287
- name: Adjust tests -> PL
@@ -108,7 +113,7 @@ jobs:
108113
python -c "print('AWS_RUN=' + str('' if '${{inputs.push_to_s3}}' == 'true' else '--dryrun'))" >> $GITHUB_ENV
109114
110115
- name: Upload checkpoints to GitHub Actions artifact
111-
uses: actions/upload-artifact@v7
116+
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
112117
with:
113118
name: checkpoints-${{ github.sha }}
114119
path: ${{ env.LEGACY_FOLDER }}/checkpoints/
@@ -139,7 +144,7 @@ jobs:
139144
env:
140145
PL_VERSION: ${{ needs.create-legacy-ckpts.outputs.pl-version }}
141146
steps:
142-
- uses: actions/checkout@v6
147+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
143148
with:
144149
ref: master
145150

@@ -148,7 +153,7 @@ jobs:
148153
run: echo ${PL_VERSION} >> back-compatible-versions.txt
149154

150155
- name: Create Pull Request
151-
uses: peter-evans/create-pull-request@v8
156+
uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8.1.1
152157
with:
153158
title: Adding test for legacy checkpoint created with ${{ env.PL_VERSION }}
154159
committer: GitHub <noreply@github.com>

.github/workflows/ci-check-md-links.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ on:
1414

1515
jobs:
1616
check-md-links:
17-
uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@main # can be pin with >=0.14.4
17+
uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@86fe1b20b4609835ba9e8c8739cd39707ba76868 # v0.15.3
1818
with:
1919
config-file: ".github/markdown-links-config.json"
2020
base-branch: "master"

.github/workflows/ci-pkg-install.yml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,17 @@ jobs:
4545
os: ["ubuntu-22.04", "macOS-14", "windows-2022"]
4646
pkg-name: ["fabric", "pytorch", "lightning", "notset"]
4747
python-version: ["3.10", "3.11"]
48+
env:
49+
# Supply-chain guard: skip PyPI releases newer than this (ISO 8601 duration). See https://pip.pypa.io/en/stable/cli/pip_install/#cmdoption-uploaded-prior-to
50+
PIP_UPLOADED_PRIOR_TO: "P2D"
4851
steps:
49-
- uses: actions/checkout@v6
50-
- uses: actions/setup-python@v6
52+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
53+
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
5154
with:
5255
python-version: ${{ matrix.python-version }}
53-
- uses: actions/download-artifact@v8
56+
- name: Upgrade pip (for --uploaded-prior-to, pip >= 26.1)
57+
run: python -m pip install --upgrade "pip>=26.1"
58+
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
5459
with:
5560
name: dist-packages-${{ github.sha }}
5661
path: dist

.github/workflows/ci-schema.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ on:
88

99
jobs:
1010
check:
11-
uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.15.3
11+
uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@86fe1b20b4609835ba9e8c8739cd39707ba76868 # v0.15.3
1212
with:
1313
# skip azure due to the wrong schema file by MSFT
1414
# https://github.com/Lightning-AI/lightning-flash/pull/1455#issuecomment-1244793607

.github/workflows/ci-tests-fabric.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,13 @@ jobs:
6868
# TODO: Remove this - Enable running MPS tests on this platform
6969
DISABLE_MPS: ${{ matrix.os == 'macOS-14' && '1' || '0' }}
7070
UV_TORCH_BACKEND: "cpu"
71+
# Supply-chain guard: skip PyPI releases newer than this. See https://docs.astral.sh/uv/reference/settings/#exclude-newer
72+
UV_EXCLUDE_NEWER: "2 days"
7173
steps:
72-
- uses: actions/checkout@v6
74+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
7375

7476
- name: Install uv and set Python version
75-
uses: astral-sh/setup-uv@v7
77+
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
7678
with:
7779
python-version: ${{ matrix.config.python-version || '3.10' }}
7880
# TODO: Avoid activating environment like this
@@ -165,7 +167,7 @@ jobs:
165167
coverage xml
166168
167169
- name: Upload coverage to Codecov
168-
uses: codecov/codecov-action@v6
170+
uses: codecov/codecov-action@e79a6962e0d4c0c17b229090214935d2e33f8354 # v6.0.1
169171
# see: https://github.com/actions/toolkit/issues/399
170172
continue-on-error: true
171173
with:

0 commit comments

Comments
 (0)