CI - Boo on Mi355 #330
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Copyright 2025 Advanced Micro Devices, Inc. | |
| # | |
| # Licensed under the Apache License v2.0 with LLVM Exceptions. | |
| # See https://llvm.org/LICENSE.txt for license information. | |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
| # This CI runs the following: | |
| # | |
| # 1. proxy with untuned boo and native pytorch | |
| # 2. prod with untuned boo and native pytorch | |
| # 3. gemms with untuned boo, native pytorch and hipblaslt | |
| # 4. batch norm with untuned boo and native pytorch | |
| # 5. prod(nchw) with untuned boo and native pytorch | |
| # 6. proxy(nchw) with untuned boo and native pytorch | |
| # 7. Attention shapes with untuned boo and inductor | |
| name: CI - Boo on Mi355 | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| commit_hash: | |
| description: "IREE commit hash" | |
| required: false | |
| default: '' | |
| iree_repo_fork: | |
| description: "IREE fork" | |
| required: false | |
| default: "iree-org/iree" | |
| iree_version_tag: | |
| description: "IREE release version tag" | |
| required: false | |
| default: '' | |
| run_prod_conv: | |
| description: "Run Prod Conv" | |
| type: boolean | |
| default: false | |
| run_all_proxy: | |
| description: "Run All Proxy" | |
| type: boolean | |
| default: false | |
| run_gemm: | |
| description: "Run Gemm" | |
| type: boolean | |
| default: false | |
| run_hipblaslt: | |
| description: "Run hipblaslt gemm" | |
| type: boolean | |
| default: false | |
| run_batch_norm: | |
| description: "Run Batch Norm" | |
| type: boolean | |
| default: false | |
| # run_prod_conv_nchw: | |
| # description: "Run Prod Conv NCHW" | |
| # type: boolean | |
| # default: false | |
| # run_all_proxy_nchw: | |
| # description: "Run All Proxy NCHW" | |
| # type: boolean | |
| # default: false | |
| run_attention: | |
| description: "Run Attention" | |
| type: boolean | |
| default: false | |
| schedule: | |
| - cron: "45 11 * * *" | |
| permissions: | |
| contents: write | |
| concurrency: | |
| # Separate concurrency groups for scheduled vs manual runs to prevent them | |
| # from cancelling each other. Scheduled jobs have higher priority and should | |
| # not be interrupted by manual workflow_dispatch runs. | |
| group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event.number || github.sha }} | |
| cancel-in-progress: true | |
| jobs: | |
| test_boo: | |
| if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }} | |
| timeout-minutes: 600 | |
| name: "Boo on Mi355" | |
| strategy: | |
| matrix: | |
| version: [3.12] | |
| fail-fast: false | |
| runs-on: linux-mi355-1gpu-ossci-nod-ai | |
| defaults: | |
| run: | |
| shell: bash | |
| env: | |
| VENV_DIR: ${{ github.workspace }}/.venv | |
| IREE_COMMIT_HASH: ${{ inputs.commit_hash }} | |
| IREE_REMOTE_REPO: ${{ inputs.iree_repo_fork }} | |
| IREE_VERSION_TAG: ${{ inputs.iree_version_tag }} | |
| steps: | |
| - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 | |
| - name: "Setting up Python" | |
| id: setup_python | |
| uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 | |
| with: | |
| python-version: ${{matrix.version}} | |
| - name: Print System Information | |
| run: | | |
| echo "============ CPU Information ============" | |
| lscpu | |
| echo "" | |
| echo "============ GPU Information ============" | |
| rocm-smi || true | |
| rocminfo || true | |
| echo "" | |
| echo "============ ROCM Version ============" | |
| cat /opt/rocm/.info/version || echo "ROCM version file not found" | |
| - name: Create Python venv | |
| run: | | |
| python -m venv ${VENV_DIR} | |
| source ${VENV_DIR}/bin/activate | |
| sudo ln -s python3 /usr/bin/python | |
| echo "${VENV_DIR}/bin" >> $GITHUB_PATH | |
| - name: Setup Environment | |
| run: | | |
| mkdir -p output_artifacts | |
| echo "OUTPUT_ARTIFACTS=$PWD/output_artifacts" >> $GITHUB_ENV | |
| python -m pip install \ | |
| --index-url https://rocm.nightlies.amd.com/v2/gfx950-dcgpu/ \ | |
| -r .github/workflows/ci_boo_torch_pin.txt | |
| - name: Install IREE Nightly Packages | |
| if: ${{ !inputs.commit_hash && !inputs.iree_version_tag }} | |
| run: | | |
| pip install --no-index --find-links https://iree.dev/pip-release-links.html --pre --no-deps iree-base-compiler iree-base-runtime | |
| - name: Install IREE from version tag | |
| if: ${{ inputs.iree_version_tag != '' }} | |
| run: | | |
| pip install \ | |
| --find-links https://iree.dev/pip-release-links.html \ | |
| --upgrade --pre \ | |
| iree-base-compiler==$IREE_VERSION_TAG \ | |
| iree-base-runtime==$IREE_VERSION_TAG | |
| - name: Install IREE from source | |
| if: ${{ inputs.commit_hash != '' }} | |
| run: | | |
| git clone https://github.com/iree-org/iree.git && cd iree | |
| git remote add fork_user https://github.com/${IREE_REMOTE_REPO} | |
| git fetch fork_user | |
| git checkout ${IREE_COMMIT_HASH} | |
| git submodule update --init | |
| export IREE_HAL_DRIVER_HIP=ON | |
| export IREE_TARGET_BACKEND_ROCM=ON | |
| export IREE_ROCM_TEST_TARGET_CHIP="" | |
| pip install -v runtime/ compiler/ | |
| echo "IREE_SOURCE_HASH=$(git -C . rev-parse --short HEAD)" >> $GITHUB_ENV | |
| - name: Install iree-turbine | |
| run: pip install "git+https://github.com/iree-org/iree-turbine.git" | |
| - name: Print packages | |
| run: | | |
| pip freeze > $OUTPUT_ARTIFACTS/version.txt | |
| if [ -n "$IREE_SOURCE_HASH" ]; then | |
| sed -i \ | |
| -e "s|iree-base-compiler @ file://[^ ]*|iree-base-compiler $IREE_SOURCE_HASH|g" \ | |
| -e "s|iree-base-runtime @ file://[^ ]*|iree-base-runtime $IREE_SOURCE_HASH|g" \ | |
| $OUTPUT_ARTIFACTS/version.txt | |
| fi | |
| cat $OUTPUT_ARTIFACTS/version.txt | |
| - name: Download config files | |
| run: | | |
| echo "Getting config files" | |
| git clone --filter=blob:none --no-checkout https://x-access-token:${{ secrets.AMD_SHARK_AI_GITHUB_TOKEN }}@github.com/nod-ai/amd-shark-ai-reports.git | |
| cd amd-shark-ai-reports | |
| git sparse-checkout init --cone | |
| git sparse-checkout set boo/all_proxy_config.txt boo/prod_conv_config.txt boo/gemm_config.txt boo/hipblaslt-bench.sh boo/all_proxy_config_nchw.txt boo/prod_conv_config_nchw.txt boo/batch_norm_config.txt boo/skip_gemm_config.txt | |
| git checkout main | |
| cd - | |
| - name: Setup environment variables | |
| run: | | |
| echo "MIOPEN_FIND_ENFORCE=3" >> $GITHUB_ENV | |
| echo "PYTHONUNBUFFERED=1" >> $GITHUB_ENV | |
| echo "ROCR_VISIBLE_DEVICES=0" >> $GITHUB_ENV | |
| - name: Run Prod Conv with verify numerics | |
| if: always() && (github.event_name == 'schedule' || inputs.run_prod_conv == true) | |
| run: | | |
| echo "============ Running Prod Conv shape with MIOpen and IREE without tuning ===================" | |
| iree-boo-driver \ | |
| --commands-file amd-shark-ai-reports/boo/prod_conv_config.txt --csv output_artifacts/prod_conv_miopen_iree.csv \ | |
| --backend=torch --backend=iree_boo_experimental --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 --verbose --numerics-verbose \ | |
| 2>&1 | tee output_artifacts/prod_conv_miopen_iree.log || true | |
| - name: Run All Proxy with verify numerics | |
| if: always() && (github.event_name == 'schedule' || inputs.run_all_proxy == true) | |
| run: | | |
| echo "============ Running All proxy shape with MIOpen and IREE without tuning ===================" | |
| iree-boo-driver \ | |
| --commands-file amd-shark-ai-reports/boo/all_proxy_config.txt --csv output_artifacts/all_proxy_miopen_iree.csv \ | |
| --backend=torch --backend=iree_boo_experimental --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 --verbose --numerics-verbose \ | |
| 2>&1 | tee output_artifacts/all_proxy_miopen_iree.log || true | |
| - name: Run Gemm with verify numerics | |
| if: always() && (github.event_name == 'schedule' || inputs.run_gemm == true) | |
| run: | | |
| echo "============ Running Gemm shape with MIOpen and IREE without tuning =======================================" | |
| iree-boo-driver \ | |
| --commands-file amd-shark-ai-reports/boo/gemm_config.txt --csv output_artifacts/gemm_miopen_iree.csv \ | |
| --backend=torch --backend=iree_boo_experimental --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 --verbose --numerics-verbose \ | |
| 2>&1 | tee output_artifacts/gemm_miopen_iree.log || true | |
| - name: Run Batch Norm with verify numerics | |
| if: always() && (github.event_name == 'schedule' || inputs.run_batch_norm == true) | |
| run: | | |
| echo "============ Running Batch norm shape with MIOpen and IREE without tuning =======================================" | |
| iree-boo-driver \ | |
| --commands-file amd-shark-ai-reports/boo/batch_norm_config.txt --csv output_artifacts/batch_norm_miopen_iree.csv \ | |
| --backend=torch --backend=iree_boo_experimental --backend=inductor --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 --verbose --numerics-verbose \ | |
| 2>&1 | tee output_artifacts/batch_norm_miopen_iree.log || true | |
| - name: Run Prod Conv NCHW with verify numerics | |
| if: false | |
| run: | | |
| echo "============ Running Prod Conv shape with NCHW layout with MIOpen and IREE without tuning ===================" | |
| iree-boo-driver \ | |
| --commands-file amd-shark-ai-reports/boo/prod_conv_config_nchw.txt --csv output_artifacts/prod_conv_miopen_iree_nchw.csv \ | |
| --backend=torch --backend=iree_boo_experimental --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 --verbose --numerics-verbose \ | |
| 2>&1 | tee output_artifacts/prod_conv_miopen_iree_nchw.log || true | |
| - name: Run All Proxy NCHW with verify numerics | |
| if: false | |
| run: | | |
| echo "============ Running All proxy shape with NCHW layout with MIOpen and IREE without tuning ===================" | |
| iree-boo-driver \ | |
| --commands-file amd-shark-ai-reports/boo/all_proxy_config_nchw.txt --csv output_artifacts/all_proxy_miopen_iree_nchw.csv \ | |
| --backend=torch --backend=iree_boo_experimental --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 --verbose --numerics-verbose \ | |
| 2>&1 | tee output_artifacts/all_proxy_miopen_iree_nchw.log || true | |
| - name: Run Attention | |
| if: always() && (github.event_name == 'schedule' || inputs.run_attention == true) | |
| run: | | |
| echo "============ Running attention shapes with Inductor and IREE without tuning ===================" | |
| pip install -r amdsharktuner/requirements-dev.txt | |
| iree-boo-driver \ | |
| --commands-file amd-shark-ai-reports/boo/attention_shapes.txt --csv output_artifacts/attention_shapes_miopen_iree.csv \ | |
| --backend inductor --backend=iree_boo_experimental \ | |
| 2>&1 | tee output_artifacts/attention_shapes.log || true | |
| - name: Run hipblaslt-bench | |
| if: always() && (github.event_name == 'schedule' || inputs.run_hipblaslt == true) | |
| run: | | |
| curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o awscliv2.zip | |
| sudo apt install unzip -y | |
| unzip awscliv2.zip | |
| sudo ./aws/install | |
| tarFileName=`aws s3 ls s3://therock-nightly-tarball/ --recursive --no-sign-request | grep '\.tar\.gz$' | grep linux-gfx950 | sort -r | head -1 | awk '{print $NF}'` | |
| echo "Installing $tarFileName" | |
| wget https://therock-nightly-tarball.s3.us-east-2.amazonaws.com/${tarFileName} | |
| mkdir install | |
| tar -xf *.tar.gz -C install | |
| export PATH=$PWD/install/bin:$PATH | |
| echo "============= Running Gemm Shape with hipblaslt ==============================" | |
| bash amd-shark-ai-reports/boo/hipblaslt-bench.sh >& output_artifacts/gemm_hipBlaslt-bench.csv | |
| - name: Upload log files | |
| if: always() | |
| uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f | |
| with: | |
| name: boo-logs | |
| path: | | |
| output_artifacts/*.csv | |
| output_artifacts/*.log | |
| output_artifacts/version.txt | |
| # New job to push logs to amd-shark-ai-reports repository | |
| push_logs: | |
| name: "Push log for conv run" | |
| needs: [ test_boo ] | |
| if: always() | |
| runs-on: ubuntu-24.04 | |
| steps: | |
| - name: Download log artifacts | |
| uses: actions/download-artifact@v7 | |
| with: | |
| name: boo-logs | |
| path: logs | |
| - name: Checkout Target Repo | |
| if: always() | |
| uses: actions/checkout@v6 | |
| with: | |
| repository: nod-ai/amd-shark-ai-reports | |
| token: ${{ secrets.AMD_SHARK_AI_GITHUB_TOKEN }} | |
| path: amd-shark-ai-reports | |
| - name: Generate Comparison | |
| if: github.event_name == 'workflow_dispatch' | |
| run: | | |
| cd amd-shark-ai-reports | |
| export LATEST_DATE=$(ls -1 boo/boo-nightly | sort | tail -n 1) | |
| if [ "${{ inputs.run_all_proxy }}" = "true" ]; then | |
| echo "1" | |
| python github-actions/new_boo_comp.py --current-file "../logs/all_proxy_miopen_iree.csv" --baseline-file "boo/boo-nightly/${LATEST_DATE}/all_proxy_miopen_iree.csv" --output-file "../logs/comparison/current_vs_${LATEST_DATE}_all_proxy_iree.md" --threshold 0.05 --total-arguments 1071 || echo "failure in all_proxy" | |
| fi | |
| if [ "${{ inputs.run_gemm }}" = "true" ]; then | |
| echo "2" | |
| python github-actions/new_boo_comp.py --current-file "../logs/gemm_miopen_iree.csv" --baseline-file "boo/boo-nightly/${LATEST_DATE}/gemm_miopen_iree.csv" --output-file "../logs/comparison/current_vs_${LATEST_DATE}_gemm_iree.md" --threshold 0.05 --total-arguments 316 || echo "failure in gemm" | |
| fi | |
| if [ "${{ inputs.run_prod_conv }}" = "true" ]; then | |
| echo "3" | |
| python github-actions/new_boo_comp.py --current-file "../logs/prod_conv_miopen_iree.csv" --baseline-file "boo/boo-nightly/${LATEST_DATE}/prod_conv_miopen_iree.csv" --output-file "../logs/comparison/current_vs_${LATEST_DATE}_prod_conv_iree.md" --threshold 0.05 --total-arguments 589 || echo "failure in prod conv" | |
| fi | |
| if [ "${{ inputs.run_batch_norm }}" = "true" ]; then | |
| echo "4" | |
| python github-actions/new_boo_comp.py --current-file "../logs/batch_norm_miopen_iree.csv" --baseline-file "boo/boo-nightly/${LATEST_DATE}/batch_norm_miopen_iree.csv" --output-file "../logs/comparison/current_vs_${LATEST_DATE}_batch_norm_miopen_iree.md" --threshold 0.05 --total-arguments 33 || echo "Failure in batchnorm" | |
| fi | |
| if [ "${{ inputs.run_hipblaslt }}" = "true" ] && [ "${{ inputs.run_gemm }}" = "true" ]; then | |
| echo "5" | |
| python github-actions/new_boo_comp_hipblaslt_iree.py --hipblaslt-file "../logs/gemm_hipBlaslt-bench.csv" --iree-file "../logs/gemm_miopen_iree.csv" --output-csv "../logs/comparison/gemm_iree_hipblaslt_comparison.csv" || echo "failure in hipblaslt" | |
| fi | |
| cd .. | |
| ls -R logs | |
| - name: Setup reports | |
| if: always() | |
| run: | | |
| git config --global user.name "GitHub Actions Bot" | |
| git config --global user.email "" | |
| if [ "${{ github.event_name }}" = "schedule" ]; then | |
| date=$(date -u +'%Y-%m-%d') | |
| OUTPUT_DIR="amd-shark-ai-reports/boo/boo-nightly/$date/" | |
| mkdir -p $OUTPUT_DIR | |
| echo "OUTPUT_DIR=$OUTPUT_DIR" >> $GITHUB_ENV | |
| else | |
| if [ "${{ github.event_name }}" = "schedule" ]; then | |
| date="$(date -u +'%Y-%m-%d')" | |
| OUTPUT_DIR="amd-shark-ai-reports/boo/boo-nightly/${date}/" | |
| else | |
| if [ -n "${{ inputs.commit_hash }}" ]; then | |
| date="$(date -u +'%Y-%m-%d_%H-%M')_${{ inputs.commit_hash }}" | |
| else | |
| date="$(date -u +'%Y-%m-%d_%H-%M')" | |
| fi | |
| OUTPUT_DIR="amd-shark-ai-reports/boo/boo-custom-runs/${date}/" | |
| fi | |
| mkdir -p $OUTPUT_DIR | |
| echo "OUTPUT_DIR=$OUTPUT_DIR" >> $GITHUB_ENV | |
| fi | |
| - name: Push artifacts | |
| if: always() | |
| run: | | |
| if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then | |
| mkdir -p $OUTPUT_DIR/comparison/ | |
| cp -v logs/comparison/*.md $OUTPUT_DIR/comparison/ || true | |
| fi | |
| cp -v logs/*.csv $OUTPUT_DIR || true | |
| cp -v logs/*.log $OUTPUT_DIR || true | |
| cp -v logs/version.txt $OUTPUT_DIR || true | |
| cd amd-shark-ai-reports/boo | |
| git pull | |
| git add . | |
| git commit -m "Add conv report on $(date -u +'%Y-%m-%d')" | |
| git push origin main | |
| rm -rf ../logs |