Skip to content

Commit 505b741

Browse files
authored
Remove V100 from test environment (#1238)
Supported for Volta will be dropped in future CUDA releases
1 parent 34066f5 commit 505b741

File tree

3 files changed

+19
-29
lines changed

3 files changed

+19
-29
lines changed

.github/workflows/_test_rosetta.yaml

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ on:
88
description: 'Rosetta image build by NVIDIA/JAX-Toolbox'
99
required: true
1010
default: 'ghcr.io/nvidia/t5x:latest'
11+
TIMEOUT_MINUTES:
12+
type: number
13+
description: 'Maximum test runtime, in minutes'
14+
default: 60
1115
outputs:
1216
TEST_ARTIFACT_NAME:
1317
description: 'Name of the unit test artifact for downstream workflows'
@@ -21,8 +25,19 @@ env:
2125
TEST_LOG_LOCAL_PATH: /log/unit-report.jsonl
2226

2327
jobs:
28+
runner:
29+
uses: ./.github/workflows/_runner_ondemand_slurm.yaml
30+
with:
31+
NAME: "A100"
32+
LABELS: "A100,${{ github.run_id }}"
33+
TIME: "${{ inputs.TIMEOUT_MINUTES }}:00"
34+
secrets: inherit
35+
2436
rosetta-unit-tests:
25-
runs-on: [self-hosted, V100]
37+
runs-on:
38+
- self-hosted
39+
- A100
40+
- "${{ github.run_id }}"
2641
outputs:
2742
TEST_ARTIFACT_NAME: ${{ env.TEST_ARTIFACT_NAME }}
2843
steps:
@@ -92,6 +107,6 @@ jobs:
92107
BADGE_COLOR=yellow
93108
fi
94109
fi
95-
echo "LABEL='V100 Unit'" >> $GITHUB_OUTPUT
110+
echo "LABEL='A100 Unit'" >> $GITHUB_OUTPUT
96111
echo "MESSAGE='${PASSED_TESTS}/${SKIPPED_TESTS}/${FAILED_TESTS} pass/skip/fail'" >> $GITHUB_OUTPUT
97-
echo "COLOR='${BADGE_COLOR}'" >> $GITHUB_OUTPUT
112+
echo "COLOR='${BADGE_COLOR}'" >> $GITHUB_OUTPUT

.github/workflows/_test_unit.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
strategy:
3838
fail-fast: false
3939
matrix:
40-
GPU_ARCH: [V100, A100]
40+
GPU_ARCH: [A100]
4141
include:
4242
- EXTRA_LABEL: "self-hosted"
4343
# ensures A100 job lands on dedicated runner for this particular job

README.md

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -71,18 +71,11 @@ We support and test the following JAX frameworks and model architectures. More d
7171
<a href="https://gist.github.com/nvjax/913c2af68649fe568e9711c2dabb23ae/#file-final-jax-md"><img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-jax-build-arm64.json&logo=docker&label=arm64"></a>
7272
</td>
7373
<td>
74-
<a href="https://gist.github.com/nvjax/913c2af68649fe568e9711c2dabb23ae#file-badge-jax-unit-test-v100-json">
75-
<img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-jax-unit-test-V100.json&logo=nvidia&label=V100">
76-
</a>
7774
<br>
7875
<a href="https://gist.github.com/nvjax/913c2af68649fe568e9711c2dabb23ae#file-badge-jax-unit-test-a100-json">
7976
<img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-jax-unit-test-A100.json&logo=nvidia&label=A100">
8077
</a>
8178
<br>
82-
<a href="https://gist.github.com/nvjax/913c2af68649fe568e9711c2dabb23ae#file-badge-te-unit-test-v100-json">
83-
<img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-te-unit-test-V100.json&logo=nvidia&label=TE%20V100">
84-
</a>
85-
<br>
8679
<a href="https://gist.github.com/nvjax/913c2af68649fe568e9711c2dabb23ae#file-badge-te-unit-test-a100-json">
8780
<img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-te-unit-test-A100.json&logo=nvidia&label=TE%20A100">
8881
</a>
@@ -91,10 +84,6 @@ We support and test the following JAX frameworks and model architectures. More d
9184
<img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-te-multigpu-test.json&logo=nvidia&label=TE%20Multi%20GPU">
9285
</a>
9386
<br>
94-
<a href="https://gist.github.com/nvjax/913c2af68649fe568e9711c2dabb23ae#file-badge-nsys-jax-unit-test-v100-json">
95-
<img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-nsys-jax-unit-test-V100.json&logo=nvidia&label=nsys-jax V100">
96-
</a>
97-
<br>
9887
<a href="https://gist.github.com/nvjax/913c2af68649fe568e9711c2dabb23ae#file-badge-nsys-jax-unit-test-a100-json">
9988
<img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-nsys-jax-unit-test-A100.json&logo=nvidia&label=nsys-jax A100">
10089
</a>
@@ -119,10 +108,6 @@ We support and test the following JAX frameworks and model architectures. More d
119108
</a>
120109
</td>
121110
<td>
122-
<a href="https://gist.github.com/nvjax/913c2af68649fe568e9711c2dabb23ae#file-badge-levanter-unit-test-v100-json">
123-
<img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-levanter-unit-test-V100.json&logo=nvidia&label=V100">
124-
</a>
125-
<br>
126111
<a href="https://gist.github.com/nvjax/913c2af68649fe568e9711c2dabb23ae#file-badge-levanter-unit-test-a100-json">
127112
<img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-levanter-unit-test-A100.json&logo=nvidia&label=A100">
128113
</a>
@@ -148,8 +133,6 @@ We support and test the following JAX frameworks and model architectures. More d
148133
</td>
149134
<td>
150135
[tests disabled]
151-
<!--<img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-equinox-unit-test-V100.json&logo=nvidia&label=V100">
152-
<img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-equinox-unit-test-A100.json&logo=nvidia&label=A100">-->
153136
</td>
154137
</tr>
155138
<tr>
@@ -168,10 +151,6 @@ We support and test the following JAX frameworks and model architectures. More d
168151
<!-- <img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-triton-build-arm64.json&logo=docker&label=arm64"> -->
169152
</td>
170153
<td>
171-
<a href="https://gist.github.com/nvjax/913c2af68649fe568e9711c2dabb23ae#file-badge-triton-unit-test-v100-json">
172-
<img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-triton-unit-test-V100.json&logo=nvidia&label=JAX-Triton V100">
173-
</a>
174-
<br>
175154
<a href="https://gist.github.com/nvjax/913c2af68649fe568e9711c2dabb23ae#file-badge-triton-unit-test-a100-json">
176155
<img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-triton-unit-test-A100.json&logo=nvidia&label=JAX-Triton A100">
177156
</a>
@@ -313,10 +292,6 @@ We support and test the following JAX frameworks and model architectures. More d
313292
<!-- <a href="https://gist.github.com/nvjax/913c2af68649fe568e9711c2dabb23ae/#file-final-gemma-md"><img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-gemma-build-arm64.json&logo=docker&label=arm64"></a> -->
314293
</td>
315294
<td>
316-
<a href="https://gist.github.com/nvjax/913c2af68649fe568e9711c2dabb23ae#file-badge-gemma-unit-test-v100-json">
317-
<img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-gemma-unit-test-V100.json&logo=nvidia&label=V100">
318-
</a>
319-
<br>
320295
<a href="https://gist.github.com/nvjax/913c2af68649fe568e9711c2dabb23ae#file-badge-gemma-unit-test-a100-json">
321296
<img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-gemma-unit-test-A100.json&logo=nvidia&label=A100">
322297
</a>

0 commit comments

Comments
 (0)