@@ -12,31 +12,30 @@ jobs:
1212 steps :
1313 - name : Checkout
1414 uses : actions/checkout@v4
15-
15+
1616 - name : Job Info
1717 run : |
1818 echo "=== 1-GPU Runner Test ==="
1919 echo "Runner: $(hostname)"
2020 echo "Timestamp: $(date -u)"
21-
21+
2222 - name : Check GPU Info
2323 run : |
2424 echo "=== GPU Information ==="
2525 nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv
26-
26+
2727 - name : Check Node Resources
2828 run : |
2929 echo "=== Node Information ==="
3030 echo "Hostname: $(hostname)"
3131 echo "CPU cores: $(nproc)"
3232 echo "Memory: $(free -h | grep Mem | awk '{print $2}')"
33-
33+
3434 - name : Run GPU Test
3535 run : |
3636 echo "NVIDIA_VISIBLE_DEVICES from K8s: ${NVIDIA_VISIBLE_DEVICES:-not set}"
3737 docker run --rm \
3838 -e NVIDIA_VISIBLE_DEVICES="${NVIDIA_VISIBLE_DEVICES:-all}" \
39- --runtime=nvidia \
4039 nvidia/cuda:12.2.0-runtime-ubuntu22.04 \
4140 bash -c "
4241 echo '=== 1-GPU Test ==='
@@ -57,32 +56,31 @@ jobs:
5756 steps :
5857 - name : Checkout
5958 uses : actions/checkout@v4
60-
59+
6160 - name : Job Info
6261 run : |
6362 echo "=== 2-GPU Runner Test ==="
6463 echo "Runner: $(hostname)"
6564 echo "Timestamp: $(date -u)"
66-
65+
6766 - name : Check GPU Info
6867 run : |
6968 echo "=== GPU Information ==="
7069 nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv
71-
70+
7271 - name : Check Node Resources
7372 run : |
7473 echo "=== Node Information ==="
7574 echo "Hostname: $(hostname)"
7675 echo "CPU cores: $(nproc)"
7776 echo "Memory: $(free -h | grep Mem | awk '{print $2}')"
78-
77+
7978 - name : Run GPU Test
8079 run : |
8180 echo "NVIDIA_VISIBLE_DEVICES from K8s: ${NVIDIA_VISIBLE_DEVICES:-not set}"
8281 # Use Kubernetes GPU allocation instead of --gpus=all
8382 docker run --rm \
8483 -e NVIDIA_VISIBLE_DEVICES="${NVIDIA_VISIBLE_DEVICES:-all}" \
85- --runtime=nvidia \
8684 nvidia/cuda:12.2.0-runtime-ubuntu22.04 \
8785 bash -c "
8886 echo '=== 2-GPU Test ==='
@@ -107,29 +105,28 @@ jobs:
107105 steps :
108106 - name : Checkout
109107 uses : actions/checkout@v4
110-
108+
111109 - name : Job Info
112110 run : |
113111 echo "=== Parallel Scaling Test ==="
114112 echo "Runner: ${{ matrix.runner }}"
115113 echo "Job ID: ${{ matrix.job_id }}"
116114 echo "Hostname: $(hostname)"
117115 echo "Timestamp: $(date -u)"
118-
116+
119117 - name : Check GPU Allocation
120118 run : |
121119 echo "=== GPU Allocation Check ==="
122120 echo "NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-not set}"
123121 nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv
124-
122+
125123 - name : Run Parallel GPU Test
126124 run : |
127125 echo "NVIDIA_VISIBLE_DEVICES from K8s: ${NVIDIA_VISIBLE_DEVICES:-not set}"
128126 echo "Testing ${{ matrix.runner }} with GPU isolation"
129127
130128 docker run --rm \
131129 -e NVIDIA_VISIBLE_DEVICES="${NVIDIA_VISIBLE_DEVICES:-all}" \
132- --runtime=nvidia \
133130 nvidia/cuda:12.2.0-runtime-ubuntu22.04 \
134131 bash -c "
135132 echo '=== Parallel Test Job ${{ matrix.job_id }} ==='
@@ -146,7 +143,7 @@ jobs:
146143 echo ''
147144 echo '✅ Parallel test completed for job ${{ matrix.job_id }}'
148145 "
149-
146+
150147 - name : Simulate Workload
151148 run : |
152149 echo "=== Simulating GPU Workload ==="
0 commit comments