Skip to content

Commit bb86261

Browse files
Create test-k8s-mode-runner.yaml
1 parent d5f5c5b commit bb86261

File tree

1 file changed

+74
-0
lines changed

1 file changed

+74
-0
lines changed
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
name: Test - K8s Mode GPU Runner
2+
3+
on:
4+
workflow_dispatch:
5+
push:
6+
paths:
7+
- ".github/workflows/test-k8s-mode-runner.yaml"
8+
9+
jobs:
10+
test-2gpu-k8s-runner:
11+
runs-on: g6-2gpu-k8s-runner
12+
steps:
13+
- name: Checkout
14+
uses: actions/checkout@v4
15+
16+
- name: Job Info
17+
run: |
18+
echo "=== K8s Mode 2-GPU Runner Test ==="
19+
echo "Runner: $(hostname)"
20+
echo "Timestamp: $(date -u)"
21+
22+
- name: Check GPU Environment
23+
run: |
24+
echo "=== GPU Environment ==="
25+
echo "NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-not set}"
26+
echo "CUDA_VISIBLE_DEVICES: ${CUDA_VISIBLE_DEVICES:-not set}"
27+
28+
- name: Check GPU Info
29+
run: |
30+
echo "=== GPU Information ==="
31+
if command -v nvidia-smi &> /dev/null; then
32+
nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv
33+
nvidia-smi -L
34+
else
35+
echo "nvidia-smi not available in this container"
36+
fi
37+
38+
- name: Check Node Resources
39+
run: |
40+
echo "=== Node Information ==="
41+
echo "Hostname: $(hostname)"
42+
echo "CPU cores: $(nproc)"
43+
echo "Memory: $(free -h | grep Mem | awk '{print $2}')"
44+
45+
# Test parallel jobs to verify GPU isolation
46+
test-parallel-k8s:
47+
strategy:
48+
matrix:
49+
job_id: [1, 2]
50+
runs-on: g6-2gpu-k8s-runner
51+
steps:
52+
- name: Job Info
53+
run: |
54+
echo "=== Parallel K8s Mode Test ==="
55+
echo "Job ID: ${{ matrix.job_id }}"
56+
echo "Hostname: $(hostname)"
57+
echo "Timestamp: $(date -u)"
58+
59+
- name: Check GPU Allocation
60+
run: |
61+
echo "=== GPU Allocation Check ==="
62+
echo "NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-not set}"
63+
if command -v nvidia-smi &> /dev/null; then
64+
nvidia-smi -L
65+
nvidia-smi --query-gpu=index,uuid --format=csv
66+
else
67+
echo "nvidia-smi not available"
68+
fi
69+
70+
- name: Simulate Workload
71+
run: |
72+
echo "=== Simulating GPU Workload ==="
73+
sleep 30
74+
echo "✅ Job ${{ matrix.job_id }} completed"

0 commit comments

Comments
 (0)