Skip to content

Commit 5dc44d5

Browse files
Update test-g6-runners.yaml
1 parent 414227a commit 5dc44d5

File tree

1 file changed

+14
-37
lines changed

1 file changed

+14
-37
lines changed

.github/workflows/test-g6-runners.yaml

Lines changed: 14 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ jobs:
2222
- name: Check GPU Info
2323
run: |
2424
echo "=== GPU Information ==="
25-
nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv
25+
docker run --rm --gpus all nvidia/cuda:12.2.0-runtime-ubuntu22.04 \
26+
nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv
2627
2728
- name: Check Node Resources
2829
run: |
@@ -33,20 +34,12 @@ jobs:
3334
3435
- name: Run GPU Test
3536
run: |
36-
echo "NVIDIA_VISIBLE_DEVICES from K8s: ${NVIDIA_VISIBLE_DEVICES:-not set}"
37-
docker run --rm \
38-
-e NVIDIA_VISIBLE_DEVICES="${NVIDIA_VISIBLE_DEVICES:-all}" \
39-
nvidia/cuda:12.2.0-runtime-ubuntu22.04 \
37+
docker run --rm --gpus all nvidia/cuda:12.2.0-runtime-ubuntu22.04 \
4038
bash -c "
4139
echo '=== 1-GPU Test ==='
4240
echo 'NVIDIA_VISIBLE_DEVICES: \$NVIDIA_VISIBLE_DEVICES'
4341
echo 'CUDA_VISIBLE_DEVICES: \$CUDA_VISIBLE_DEVICES'
44-
if command -v nvidia-smi &> /dev/null; then
45-
nvidia-smi
46-
else
47-
echo 'nvidia-smi not available, testing CUDA runtime...'
48-
echo 'GPU access: OK'
49-
fi
42+
nvidia-smi
5043
echo ''
5144
echo '✅ 1-GPU test completed'
5245
"
@@ -66,7 +59,8 @@ jobs:
6659
- name: Check GPU Info
6760
run: |
6861
echo "=== GPU Information ==="
69-
nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv
62+
docker run --rm --gpus all nvidia/cuda:12.2.0-runtime-ubuntu22.04 \
63+
nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv
7064
7165
- name: Check Node Resources
7266
run: |
@@ -77,21 +71,12 @@ jobs:
7771
7872
- name: Run GPU Test
7973
run: |
80-
echo "NVIDIA_VISIBLE_DEVICES from K8s: ${NVIDIA_VISIBLE_DEVICES:-not set}"
81-
# Use Kubernetes GPU allocation instead of --gpus=all
82-
docker run --rm \
83-
-e NVIDIA_VISIBLE_DEVICES="${NVIDIA_VISIBLE_DEVICES:-all}" \
84-
nvidia/cuda:12.2.0-runtime-ubuntu22.04 \
74+
docker run --rm --gpus all nvidia/cuda:12.2.0-runtime-ubuntu22.04 \
8575
bash -c "
8676
echo '=== 2-GPU Test ==='
8777
echo 'NVIDIA_VISIBLE_DEVICES: \$NVIDIA_VISIBLE_DEVICES'
8878
echo 'CUDA_VISIBLE_DEVICES: \$CUDA_VISIBLE_DEVICES'
89-
if command -v nvidia-smi &> /dev/null; then
90-
nvidia-smi
91-
else
92-
echo 'nvidia-smi not available, testing CUDA runtime...'
93-
echo 'GPU access: OK'
94-
fi
79+
nvidia-smi
9580
echo ''
9681
echo '✅ 2-GPU test completed'
9782
"
@@ -117,29 +102,21 @@ jobs:
117102
- name: Check GPU Allocation
118103
run: |
119104
echo "=== GPU Allocation Check ==="
120-
echo "NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-not set}"
121-
nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv
105+
docker run --rm --gpus all nvidia/cuda:12.2.0-runtime-ubuntu22.04 \
106+
nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv
122107
123108
- name: Run Parallel GPU Test
124109
run: |
125-
echo "NVIDIA_VISIBLE_DEVICES from K8s: ${NVIDIA_VISIBLE_DEVICES:-not set}"
126110
echo "Testing ${{ matrix.runner }} with GPU isolation"
127111
128-
docker run --rm \
129-
-e NVIDIA_VISIBLE_DEVICES="${NVIDIA_VISIBLE_DEVICES:-all}" \
130-
nvidia/cuda:12.2.0-runtime-ubuntu22.04 \
112+
docker run --rm --gpus all nvidia/cuda:12.2.0-runtime-ubuntu22.04 \
131113
bash -c "
132114
echo '=== Parallel Test Job ${{ matrix.job_id }} ==='
133115
echo 'Runner: ${{ matrix.runner }}'
134116
echo 'NVIDIA_VISIBLE_DEVICES: \$NVIDIA_VISIBLE_DEVICES'
135-
if command -v nvidia-smi &> /dev/null; then
136-
nvidia-smi -L
137-
echo 'GPU Memory Info:'
138-
nvidia-smi --query-gpu=memory.total,memory.used --format=csv,noheader,nounits
139-
else
140-
echo 'nvidia-smi not available, testing CUDA runtime...'
141-
echo 'GPU access: OK'
142-
fi
117+
nvidia-smi -L
118+
echo 'GPU Memory Info:'
119+
nvidia-smi --query-gpu=memory.total,memory.used --format=csv,noheader,nounits
143120
echo ''
144121
echo '✅ Parallel test completed for job ${{ matrix.job_id }}'
145122
"

0 commit comments

Comments
 (0)