CI - Boo on RDNA4 #187
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Copyright 2025 Advanced Micro Devices, Inc. | |
| # | |
| # Licensed under the Apache License v2.0 with LLVM Exceptions. | |
| # See https://llvm.org/LICENSE.txt for license information. | |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
| # This CI runs the following: | |
| # | |
| # 1. proxy with untuned boo and native pytorch | |
| # 2. prod with untuned boo and native pytorch | |
| # 3. gemms with untuned boo and native pytorch | |
| # 4. attention shapes with untuned boo and inductor | |
| name: CI - Boo on RDNA4 | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| runner: | |
| description: 'Runner to use for the job' | |
| required: false | |
| default: 'shark-mkm-4-boo' | |
| type: choice | |
| options: | |
| - shark-mkm-4-boo | |
| - shark-mkm-1-for-boo | |
| - shark41-rdna4 | |
| run_prod_conv: | |
| description: "Run Prod Conv" | |
| type: boolean | |
| default: false | |
| run_all_proxy: | |
| description: "Run All Proxy" | |
| type: boolean | |
| default: false | |
| run_gemm: | |
| description: "Run Gemm" | |
| type: boolean | |
| default: false | |
| run_hipblaslt: | |
| description: "Run hipblaslt gemm" | |
| type: boolean | |
| default: false | |
| run_batch_norm: | |
| description: "Run Batch Norm" | |
| type: boolean | |
| default: false | |
| run_attention: | |
| description: "Run Attention" | |
| type: boolean | |
| default: false | |
| schedule: | |
| - cron: "36 5 * * *" | |
| permissions: | |
| contents: write | |
| concurrency: | |
| # Separate concurrency groups for scheduled vs manual runs to prevent them | |
| # from cancelling each other. Scheduled jobs have higher priority and should | |
| # not be interrupted by manual workflow_dispatch runs. | |
| group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event.number || github.sha }} | |
| cancel-in-progress: true | |
| jobs: | |
| test_boo_rdna4: | |
| if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} | |
| timeout-minutes: 600 | |
| name: "Boo on RDNA 4" | |
| strategy: | |
| matrix: | |
| version: [3.12] | |
| fail-fast: false | |
| runs-on: ${{ github.event.inputs.runner || 'shark-mkm-4-boo' }} | |
| defaults: | |
| run: | |
| shell: bash | |
| env: | |
| VENV_DIR: ${{ github.workspace }}/.venv | |
| OFFLINE_SERVING: DISABLED | |
| steps: | |
| - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 | |
| - name: "Setting up Python" | |
| id: setup_python | |
| uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 | |
| with: | |
| python-version: ${{matrix.version}} | |
| # - name: Print System Information | |
| # run: | | |
| # echo "============ CPU Information ============" | |
| # lscpu | |
| # echo "" | |
| # echo "============ GPU Information ============" | |
| # rocm-smi || true | |
| # rocminfo || true | |
| # echo "" | |
| # echo "============ ROCM Version ============" | |
| # cat /opt/rocm/.info/version || echo "ROCM version file not found" | |
| - name: Create Python venv | |
| run: | | |
| python -m venv ${VENV_DIR} | |
| source ${VENV_DIR}/bin/activate | |
| echo "${VENV_DIR}/bin" >> $GITHUB_PATH | |
| - name: Install pip deps | |
| run: | | |
| mkdir -p output_artifacts | |
| python -m pip install \ | |
| --index-url https://rocm.nightlies.amd.com/v2/gfx120X-all/ \ | |
| -r .github/workflows/ci_boo_torch_pin.txt | |
| pip install --no-index --find-links https://iree.dev/pip-release-links.html --pre --no-deps iree-base-compiler iree-base-runtime | |
| pip install "git+https://github.com/iree-org/iree-turbine.git" | |
| pip freeze > $(pwd)/output_artifacts/version.txt | |
| - name: Download config files | |
| run: | | |
| echo "Getting config files" | |
| git clone --filter=blob:none --no-checkout https://x-access-token:${{ secrets.AMD_SHARK_AI_GITHUB_TOKEN }}@github.com/nod-ai/amd-shark-ai-reports.git | |
| cd amd-shark-ai-reports | |
| git sparse-checkout init --cone | |
| git sparse-checkout set boo/all_proxy_config.txt boo/prod_conv_config.txt boo/gemm_config.txt boo/hipblaslt-bench.sh boo/all_proxy_config_nchw.txt boo/prod_conv_config_nchw.txt boo/batch_norm_config.txt boo/skip_gemm_config.txt | |
| git checkout main | |
| cd - | |
| - name: Setup environment variables | |
| run: | | |
| echo "MIOPEN_FIND_ENFORCE=3" >> $GITHUB_ENV | |
| echo "PYTHONUNBUFFERED=1" >> $GITHUB_ENV | |
| echo "ROCR_VISIBLE_DEVICES=0" >> $GITHUB_ENV | |
| - name: Run hipblaslt-bench | |
| if: always() && (github.event_name == 'schedule' || inputs.run_hipblaslt == true) | |
| run: | | |
| source ${VENV_DIR}/bin/activate | |
| tarFileName=$( aws s3 ls s3://therock-nightly-tarball/ --recursive --no-sign-request | grep '\.tar\.gz$' | grep linux-gfx120X | sort -r | head -1 | awk '{print $NF}' || true ) | |
| echo "Installing $tarFileName" | |
| wget https://therock-nightly-tarball.s3.us-east-2.amazonaws.com/${tarFileName} | |
| mkdir install | |
| tar -xf *.tar.gz -C install | |
| export PATH=$PWD/install/bin:$PATH | |
| echo "============= Running Gemm Shape with hipblaslt ==============================" | |
| bash amd-shark-ai-reports/boo/hipblaslt-bench.sh >& output_artifacts/rdna4_gemm_hipBlaslt-bench.csv | |
| - name: Run Prod Conv with verify numerics | |
| if: always() && (github.event_name == 'schedule' || inputs.run_prod_conv == true) | |
| run: | | |
| echo "============ Running Prod Conv shape with MIOpen and IREE without tuning ===================" | |
| iree-boo-driver \ | |
| --commands-file amd-shark-ai-reports/boo/prod_conv_config.txt --csv output_artifacts/rdna4_prod_conv_miopen_iree.csv \ | |
| --backend=torch --backend=iree_boo_experimental --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 --verbose --numerics-verbose \ | |
| 2>&1 | tee output_artifacts/rdna4_prod_conv_miopen_iree.log || true | |
| - name: Run All Proxy | |
| if: always() && (github.event_name == 'schedule' || inputs.run_all_proxy == true) | |
| run: | | |
| echo "============ Running All proxy shape with MIOpen and IREE without tuning ===================" | |
| ## temporarily disable --backend=torch | |
| iree-boo-driver \ | |
| --commands-file amd-shark-ai-reports/boo/all_proxy_config.txt --csv output_artifacts/rdna4_all_proxy_miopen_iree.csv \ | |
| --backend=iree_boo_experimental --verbose \ | |
| 2>&1 | tee output_artifacts/rdna4_all_proxy_miopen_iree.log || true | |
| - name: Run Gemm with verify numerics | |
| if: always() && (github.event_name == 'schedule' || inputs.run_gemm == true) | |
| run: | | |
| echo "============ Running Gemm shape with MIOpen and IREE without tuning =======================================" | |
| iree-boo-driver \ | |
| --commands-file amd-shark-ai-reports/boo/gemm_config.txt --csv output_artifacts/rdna4_gemm_miopen_iree.csv \ | |
| --backend=torch --backend=iree_boo_experimental --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 --verbose --numerics-verbose \ | |
| 2>&1 | tee output_artifacts/rdna4_gemm_miopen_iree.log || true | |
| - name: Run Batch Norm with verify numerics | |
| if: always() && (github.event_name == 'schedule' || inputs.run_batch_norm == true) | |
| run: | | |
| echo "============ Running Batch norm shape with MIOpen and IREE without tuning =======================================" | |
| iree-boo-driver \ | |
| --commands-file amd-shark-ai-reports/boo/batch_norm_config.txt --csv output_artifacts/rdna4_batch_norm_miopen_iree.csv \ | |
| --backend=torch --backend=iree_boo_experimental --backend=inductor --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 --verbose --numerics-verbose \ | |
| 2>&1 | tee output_artifacts/rdna4_batch_norm_miopen_iree.log || true | |
| - name: Run Prod Conv NCHW with verify numerics | |
| if: false | |
| run: | | |
| echo "============ Running Prod Conv shape with NCHW layout with MIOpen and IREE without tuning ===================" | |
| iree-boo-driver \ | |
| --commands-file amd-shark-ai-reports/boo/prod_conv_config_nchw.txt --csv output_artifacts/rdna4_prod_conv_miopen_iree_nchw.csv \ | |
| --backend=torch --backend=iree_boo_experimental --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 --verbose --numerics-verbose \ | |
| 2>&1 | tee output_artifacts/rdna4_prod_conv_miopen_iree_nchw.log || true | |
| - name: Run All Proxy NCHW with verify numerics | |
| if: false | |
| run: | | |
| echo "============ Running All proxy shape with NCHW layout with MIOpen and IREE without tuning ===================" | |
| iree-boo-driver \ | |
| --commands-file amd-shark-ai-reports/boo/all_proxy_config_nchw.txt --csv output_artifacts/rdna4_all_proxy_miopen_iree_nchw.csv \ | |
| --backend=torch --backend=iree_boo_experimental --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 --verbose --numerics-verbose \ | |
| 2>&1 | tee output_artifacts/rdna4_all_proxy_miopen_iree_nchw.log || true | |
| - name: Run Attention | |
| if: always() && (github.event_name == 'schedule' || inputs.run_attention == true) | |
| run: | | |
| echo "============ Running attention shapes with Inductor and IREE without tuning ===================" | |
| pip install -r amdsharktuner/requirements-dev.txt | |
| iree-boo-driver \ | |
| --commands-file amd-shark-ai-reports/boo/attention_shapes.txt --csv output_artifacts/rdna4_attention_shapes_miopen_iree.csv \ | |
| --backend inductor --backend=iree_boo_experimental \ | |
| 2>&1 | tee output_artifacts/rdna4_attention_shapes.log || true | |
| - name: Upload log files | |
| if: always() | |
| uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f | |
| with: | |
| name: rdna-boo-logs | |
| path: | | |
| output_artifacts/*.csv | |
| output_artifacts/*.log | |
| output_artifacts/version.txt | |
| # New job to push logs to amd-shark-ai-reports repository | |
| push_logs: | |
| name: "Push log for conv run" | |
| needs: [ test_boo_rdna4 ] | |
| if: always() | |
| runs-on: ubuntu-24.04 | |
| steps: | |
| - name: Download log artifacts | |
| uses: actions/download-artifact@v7 | |
| with: | |
| name: rdna-boo-logs | |
| path: logs | |
| - name: Checkout Target Repo | |
| if: always() | |
| uses: actions/checkout@v6 | |
| with: | |
| repository: nod-ai/amd-shark-ai-reports | |
| token: ${{ secrets.AMD_SHARK_AI_GITHUB_TOKEN }} | |
| path: amd-shark-ai-reports | |
| - name: Generate Comparison | |
| if: github.event_name == 'workflow_dispatch' | |
| run: | | |
| cd amd-shark-ai-reports | |
| export LATEST_DATE=$(ls -1 boo/boo-nightly-gfx120X | sort | tail -n 1) | |
| if [ "${{ inputs.run_all_proxy }}" = "true" ]; then | |
| echo "1" | |
| python github-actions/new_boo_comp.py --current-file "../logs/rdna4_all_proxy_miopen_iree.csv" --baseline-file "boo/boo-nightly-gfx120X/${LATEST_DATE}/rdna4_all_proxy_miopen_iree.csv" --output-file "../logs/comparison/current_vs_${LATEST_DATE}_all_proxy_iree.md" --threshold 0.05 --total-arguments 1071 || echo "failure in all_proxy" | |
| fi | |
| if [ "${{ inputs.run_gemm }}" = "true" ]; then | |
| echo "2" | |
| python github-actions/new_boo_comp.py --current-file "../logs/rdna4_gemm_miopen_iree.csv" --baseline-file "boo/boo-nightly-gfx120X/${LATEST_DATE}/rdna4_gemm_miopen_iree.csv" --output-file "../logs/comparison/current_vs_${LATEST_DATE}_gemm_iree.md" --threshold 0.05 --total-arguments 316 || echo "failure in gemm" | |
| fi | |
| if [ "${{ inputs.run_prod_conv }}" = "true" ]; then | |
| echo "3" | |
| python github-actions/new_boo_comp.py --current-file "../logs/rdna4_prod_conv_miopen_iree.csv" --baseline-file "boo/boo-nightly-gfx120X/${LATEST_DATE}/rdna4_prod_conv_miopen_iree.csv" --output-file "../logs/comparison/current_vs_${LATEST_DATE}_prod_conv_iree.md" --threshold 0.05 --total-arguments 589 || echo "failure in prod conv" | |
| fi | |
| if [ "${{ inputs.run_batch_norm }}" = "true" ]; then | |
| echo "4" | |
| python github-actions/new_boo_comp.py --current-file "../logs/rdna4_batch_norm_miopen_iree.csv" --baseline-file "boo/boo-nightly-gfx120X/${LATEST_DATE}/rdna4_batch_norm_miopen_iree.csv" --output-file "../logs/comparison/current_vs_${LATEST_DATE}_batch_norm_miopen_iree.md" --threshold 0.05 --total-arguments 33 || echo "Failure in batchnorm" | |
| fi | |
| if [ "${{ inputs.run_hipblaslt }}" = "true" ] && [ "${{ inputs.run_gemm }}" = "true" ]; then | |
| echo "5" | |
| python github-actions/new_boo_comp_hipblaslt_iree.py --hipblaslt-file "../logs/rdna4_gemm_hipBlaslt-bench.csv" --iree-file "../logs/rdna4_gemm_miopen_iree.csv" --output-csv "../logs/comparison/gemm_iree_hipblaslt_comparison.csv" || echo "failure in hipblaslt" | |
| fi | |
| cd .. | |
| ls -R logs | |
| - name: Setup reports | |
| if: always() | |
| run: | | |
| git config --global user.name "GitHub Actions Bot" | |
| git config --global user.email "" | |
| if [ "${{ github.event_name }}" = "schedule" ]; then | |
| date=$(date -u +'%Y-%m-%d') | |
| OUTPUT_DIR="amd-shark-ai-reports/boo/boo-nightly-gfx120X/$date/" | |
| else | |
| date="$(date -u +'%Y-%m-%d_%H-%M')" | |
| OUTPUT_DIR="amd-shark-ai-reports/boo/boo-custom-runs-gfx120X/${date}/" | |
| fi | |
| mkdir -p $OUTPUT_DIR | |
| echo "OUTPUT_DIR=$OUTPUT_DIR" >> $GITHUB_ENV | |
| - name: Push artifacts | |
| if: always() | |
| run: | | |
| if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then | |
| mkdir -p $OUTPUT_DIR/comparison/ | |
| cp -v logs/comparison/*.md $OUTPUT_DIR/comparison/ || true | |
| fi | |
| cp -v logs/*.csv $OUTPUT_DIR || true | |
| cp -v logs/*.log $OUTPUT_DIR || true | |
| cp -v logs/version.txt $OUTPUT_DIR || true | |
| cd amd-shark-ai-reports/boo | |
| git pull | |
| git add . | |
| git commit -m "Add conv report on $(date -u +'%Y-%m-%d')" | |
| git push origin main | |
| rm -rf ../logs |