Update test-g6-runners.yaml #10
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test - G6 Runner Autoscaling | |
| on: | |
| workflow_dispatch: | |
| push: | |
| paths: | |
| - ".github/workflows/test-g6-runners.yaml" | |
| jobs: | |
| test-1gpu-runner: | |
| runs-on: g6-1gpu-runner | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Job Info | |
| run: | | |
| echo "=== 1-GPU Runner Test ===" | |
| echo "Runner: $(hostname)" | |
| echo "Timestamp: $(date -u)" | |
| - name: Check GPU Info | |
| run: | | |
| echo "=== GPU Information ===" | |
| docker run --rm --gpus all nvidia/cuda:12.2.0-runtime-ubuntu22.04 \ | |
| nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv | |
| - name: Check Node Resources | |
| run: | | |
| echo "=== Node Information ===" | |
| echo "Hostname: $(hostname)" | |
| echo "CPU cores: $(nproc)" | |
| echo "Memory: $(free -h | grep Mem | awk '{print $2}')" | |
| - name: Run GPU Test | |
| run: | | |
| docker run --rm --gpus all nvidia/cuda:12.2.0-runtime-ubuntu22.04 \ | |
| bash -c " | |
| echo '=== 1-GPU Test ===' | |
| echo 'NVIDIA_VISIBLE_DEVICES: \$NVIDIA_VISIBLE_DEVICES' | |
| echo 'CUDA_VISIBLE_DEVICES: \$CUDA_VISIBLE_DEVICES' | |
| nvidia-smi | |
| echo '' | |
| echo '✅ 1-GPU test completed' | |
| " | |
| test-2gpu-runner: | |
| runs-on: g6-2gpu-runner | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Job Info | |
| run: | | |
| echo "=== 2-GPU Runner Test ===" | |
| echo "Runner: $(hostname)" | |
| echo "Timestamp: $(date -u)" | |
| - name: Check GPU Info | |
| run: | | |
| echo "=== GPU Information ===" | |
| docker run --rm --gpus all nvidia/cuda:12.2.0-runtime-ubuntu22.04 \ | |
| nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv | |
| - name: Check Node Resources | |
| run: | | |
| echo "=== Node Information ===" | |
| echo "Hostname: $(hostname)" | |
| echo "CPU cores: $(nproc)" | |
| echo "Memory: $(free -h | grep Mem | awk '{print $2}')" | |
| - name: Run GPU Test | |
| run: | | |
| docker run --rm --gpus all nvidia/cuda:12.2.0-runtime-ubuntu22.04 \ | |
| bash -c " | |
| echo '=== 2-GPU Test ===' | |
| echo 'NVIDIA_VISIBLE_DEVICES: \$NVIDIA_VISIBLE_DEVICES' | |
| echo 'CUDA_VISIBLE_DEVICES: \$CUDA_VISIBLE_DEVICES' | |
| nvidia-smi | |
| echo '' | |
| echo '✅ 2-GPU test completed' | |
| " | |
| test-parallel-scaling: | |
| strategy: | |
| matrix: | |
| runner: [g6-1gpu-runner, g6-2gpu-runner] | |
| job_id: [1, 2, 3] | |
| runs-on: ${{ matrix.runner }} | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Job Info | |
| run: | | |
| echo "=== Parallel Scaling Test ===" | |
| echo "Runner: ${{ matrix.runner }}" | |
| echo "Job ID: ${{ matrix.job_id }}" | |
| echo "Hostname: $(hostname)" | |
| echo "Timestamp: $(date -u)" | |
| - name: Check GPU Allocation | |
| run: | | |
| echo "=== GPU Allocation Check ===" | |
| docker run --rm --gpus all nvidia/cuda:12.2.0-runtime-ubuntu22.04 \ | |
| nvidia-smi --query-gpu=index,name,memory.total,memory.used --format=csv | |
| - name: Run Parallel GPU Test | |
| run: | | |
| echo "Testing ${{ matrix.runner }} with GPU isolation" | |
| docker run --rm --gpus all nvidia/cuda:12.2.0-runtime-ubuntu22.04 \ | |
| bash -c " | |
| echo '=== Parallel Test Job ${{ matrix.job_id }} ===' | |
| echo 'Runner: ${{ matrix.runner }}' | |
| echo 'NVIDIA_VISIBLE_DEVICES: \$NVIDIA_VISIBLE_DEVICES' | |
| nvidia-smi -L | |
| echo 'GPU Memory Info:' | |
| nvidia-smi --query-gpu=memory.total,memory.used --format=csv,noheader,nounits | |
| echo '' | |
| echo '✅ Parallel test completed for job ${{ matrix.job_id }}' | |
| " | |
| - name: Simulate Workload | |
| run: | | |
| echo "=== Simulating GPU Workload ===" | |
| sleep 30 # Simulate some work to test autoscaling behavior |