Update test-g6-runners.yaml #6
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test - G6 Runner Autoscaling | |
| on: | |
| workflow_dispatch: | |
| push: | |
| paths: | |
| - ".github/workflows/test-g6-runners.yaml" | |
| jobs: | |
| test-1gpu-runner: | |
| runs-on: g6-1gpu-runner | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Job Info | |
| run: | | |
| echo "=== 1-GPU Runner Test ===" | |
| echo "Runner: $(hostname)" | |
| echo "Timestamp: $(date -u)" | |
| - name: Check GPU Info | |
| run: | | |
| echo "=== GPU Information ===" | |
| nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv | |
| - name: Check Node Resources | |
| run: | | |
| echo "=== Node Information ===" | |
| echo "Hostname: $(hostname)" | |
| echo "CPU cores: $(nproc)" | |
| echo "Memory: $(free -h | grep Mem | awk '{print $2}')" | |
| - name: Run GPU Test | |
| run: | | |
| docker run --rm --gpus="device=${NVIDIA_VISIBLE_DEVICES:-0}" \ | |
| -e CUDA_VISIBLE_DEVICES=0 \ | |
| nvidia/cuda:12.2.0-base-ubuntu22.04 \ | |
| bash -c " | |
| echo '=== 1-GPU Test ===' | |
| nvidia-smi | |
| echo '' | |
| echo '✅ 1-GPU test completed' | |
| " | |
| test-2gpu-runner: | |
| runs-on: g6-2gpu-runner | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Job Info | |
| run: | | |
| echo "=== 2-GPU Runner Test ===" | |
| echo "Runner: $(hostname)" | |
| echo "Timestamp: $(date -u)" | |
| - name: Check GPU Info | |
| run: | | |
| echo "=== GPU Information ===" | |
| nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv | |
| - name: Check Node Resources | |
| run: | | |
| echo "=== Node Information ===" | |
| echo "Hostname: $(hostname)" | |
| echo "CPU cores: $(nproc)" | |
| echo "Memory: $(free -h | grep Mem | awk '{print $2}')" | |
| - name: Run GPU Test | |
| run: | | |
| docker run --rm --gpus="device=${NVIDIA_VISIBLE_DEVICES:-0,1}" \ | |
| -e CUDA_VISIBLE_DEVICES=0,1 \ | |
| nvidia/cuda:12.2.0-base-ubuntu22.04 \ | |
| bash -c " | |
| echo '=== 2-GPU Test ===' | |
| nvidia-smi | |
| echo '' | |
| echo '✅ 2-GPU test completed' | |
| " | |
| test-parallel-scaling: | |
| strategy: | |
| matrix: | |
| runner: [g6-1gpu-runner, g6-2gpu-runner] | |
| job_id: [1, 2, 3] | |
| runs-on: ${{ matrix.runner }} | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Job Info | |
| run: | | |
| echo "=== Parallel Scaling Test ===" | |
| echo "Runner: ${{ matrix.runner }}" | |
| echo "Job ID: ${{ matrix.job_id }}" | |
| echo "Hostname: $(hostname)" | |
| echo "Timestamp: $(date -u)" | |
| - name: Check GPU Allocation | |
| run: | | |
| echo "=== GPU Allocation Check ===" | |
| echo "NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-not set}" | |
| nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv | |
| - name: Run Parallel GPU Test | |
| run: | | |
| if [[ "${{ matrix.runner }}" == "g6-1gpu-runner" ]]; then | |
| gpu_devices="${NVIDIA_VISIBLE_DEVICES:-0}" | |
| echo "Testing 1-GPU runner with device: $gpu_devices" | |
| else | |
| gpu_devices="${NVIDIA_VISIBLE_DEVICES:-0,1}" | |
| echo "Testing 2-GPU runner with devices: $gpu_devices" | |
| fi | |
| docker run --rm --gpus="device=$gpu_devices" \ | |
| nvidia/cuda:12.2.0-base-ubuntu22.04 \ | |
| bash -c " | |
| echo '=== Parallel Test Job ${{ matrix.job_id }} ===' | |
| echo 'Runner: ${{ matrix.runner }}' | |
| nvidia-smi -L | |
| echo '' | |
| echo '✅ Parallel test completed for job ${{ matrix.job_id }}' | |
| " | |
| - name: Simulate Workload | |
| run: | | |
| echo "=== Simulating GPU Workload ===" | |
| sleep 30 # Simulate some work to test autoscaling behavior |