Update test-native-gpu-runner.yaml #2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test - Native GPU Runner | |
| on: | |
| workflow_dispatch: | |
| push: | |
| paths: | |
| - ".github/workflows/test-native-gpu-runner.yaml" | |
| jobs: | |
| test-2gpu-native: | |
| runs-on: g6-2gpu-native-runner | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Job Info | |
| run: | | |
| echo "=== Native GPU Runner Test ===" | |
| echo "Runner: $(hostname)" | |
| echo "Timestamp: $(date -u)" | |
| - name: Check GPU Environment | |
| run: | | |
| echo "=== GPU Environment ===" | |
| echo "NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-not set}" | |
| echo "CUDA_VISIBLE_DEVICES: ${CUDA_VISIBLE_DEVICES:-not set}" | |
| - name: Check GPU Info | |
| run: | | |
| echo "=== GPU Information ===" | |
| nvidia-smi | |
| nvidia-smi -L | |
| nvidia-smi --query-gpu=index,name,uuid,memory.total,memory.used --format=csv | |
| - name: Check CUDA | |
| run: | | |
| echo "=== CUDA Info ===" | |
| nvcc --version || echo "nvcc not in PATH" | |
| ls -la /usr/local/cuda/bin/ || true | |
| - name: Check Node Resources | |
| run: | | |
| echo "=== Node Information ===" | |
| echo "Hostname: $(hostname)" | |
| echo "CPU cores: $(nproc)" | |
| echo "Memory: $(free -h | grep Mem | awk '{print $2}')" | |
| # Test parallel jobs to verify GPU isolation | |
| test-parallel-native: | |
| strategy: | |
| matrix: | |
| job_id: [1, 2] | |
| runs-on: g6-2gpu-native-runner | |
| steps: | |
| - name: Job Info | |
| run: | | |
| echo "=== Parallel Native GPU Test ===" | |
| echo "Job ID: ${{ matrix.job_id }}" | |
| echo "Hostname: $(hostname)" | |
| echo "Start Time: $(date -u +%Y-%m-%dT%H:%M:%S)" | |
| - name: Check GPU Allocation | |
| run: | | |
| echo "=== GPU Allocation Check for Job ${{ matrix.job_id }} ===" | |
| echo "NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-not set}" | |
| echo "" | |
| echo "GPU UUIDs assigned to this job:" | |
| nvidia-smi --query-gpu=index,uuid,name --format=csv | |
| echo "" | |
| echo "Full GPU Details:" | |
| nvidia-smi -L | |
| - name: Simulate Workload with GPU Lock | |
| run: | | |
| echo "=== Job ${{ matrix.job_id }} - Holding GPUs for 60 seconds ===" | |
| echo "GPUs held by this job:" | |
| nvidia-smi --query-gpu=uuid --format=csv,noheader | |
| echo "" | |
| echo "Starting workload at: $(date -u +%Y-%m-%dT%H:%M:%S)" | |
| sleep 60 | |
| echo "Finished workload at: $(date -u +%Y-%m-%dT%H:%M:%S)" | |
| echo "✅ Job ${{ matrix.job_id }} completed" |