Skip to content

CI - Boo on RDNA3

CI - Boo on RDNA3 #3

# Copyright 2025 Advanced Micro Devices, Inc.
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
# This CI runs the following:
#
# 1. proxy with untuned boo and native pytorch
# 2. prod with untuned boo and native pytorch
# 3. gemms with untuned boo and native pytorch
# 4. attention shapes with untuned boo and inductor
name: CI - Boo on RDNA3
on:
workflow_dispatch:
inputs:
runner:
description: 'Runner to use for the job'
required: false
default: 'rdna3-ci'
type: choice
options:
- rdna3-ci
run_prod_conv:
description: "Run Prod Conv"
type: boolean
default: false
run_all_proxy:
description: "Run All Proxy"
type: boolean
default: false
run_gemm:
description: "Run Gemm"
type: boolean
default: false
run_prod_conv_nchw:
description: "Run prod conv nchw"
type: boolean
default: false
run_all_proxy_nchw:
description: "Run all proxy nchw"
type: boolean
default: false
run_hipblaslt:
description: "Run hipblaslt gemm"
type: boolean
default: false
run_batch_norm:
description: "Run Batch Norm"
type: boolean
default: false
run_attention:
description: "Run Attention"
type: boolean
default: false
schedule:
- cron: "36 6 * * *"
permissions:
contents: write
concurrency:
# Separate concurrency groups for scheduled vs manual runs to prevent them
# from cancelling each other. Scheduled jobs have higher priority and should
# not be interrupted by manual workflow_dispatch runs.
group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event.number || github.sha }}
cancel-in-progress: true
jobs:
test_boo_rdna3:
if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }}
timeout-minutes: 645
name: "Boo on RDNA 3"
strategy:
matrix:
version: [3.12]
fail-fast: false
runs-on: ${{ github.event.inputs.runner || 'rdna3-ci' }}
defaults:
run:
shell: bash
env:
VENV_DIR: ${{ github.workspace }}/.venv
OFFLINE_SERVING: DISABLED
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: "Setting up Python"
id: setup_python
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
with:
python-version: ${{matrix.version}}
# - name: Print System Information
# run: |
# echo "============ CPU Information ============"
# lscpu
# echo ""
# echo "============ GPU Information ============"
# rocm-smi || true
# rocminfo || true
# echo ""
# echo "============ ROCM Version ============"
# cat /opt/rocm/.info/version || echo "ROCM version file not found"
- name: Create Python venv
run: |
python -m venv ${VENV_DIR}
source ${VENV_DIR}/bin/activate
echo "${VENV_DIR}/bin" >> $GITHUB_PATH
- name: Install pip deps
run: |
mkdir -p output_artifacts
python -m pip install \
--index-url https://rocm.nightlies.amd.com/v2/gfx110X-all \
-r .github/workflows/ci_boo_torch_pin.txt
pip install --no-index --find-links https://iree.dev/pip-release-links.html --pre --no-deps iree-base-compiler iree-base-runtime
pip install "git+https://github.com/iree-org/iree-turbine.git"
pip freeze > $(pwd)/output_artifacts/version.txt
- name: Download config files
run: |
echo "Getting config files"
git clone --filter=blob:none --no-checkout https://x-access-token:${{ secrets.AMD_SHARK_AI_GITHUB_TOKEN }}@github.com/nod-ai/amd-shark-ai-reports.git
cd amd-shark-ai-reports
git sparse-checkout init --cone
git sparse-checkout set boo/all_proxy_config.txt boo/prod_conv_config.txt boo/gemm_config.txt boo/hipblaslt-bench.sh boo/all_proxy_config_nchw.txt boo/prod_conv_config_nchw.txt boo/batch_norm_config.txt boo/skip_gemm_config.txt
git checkout main
cd -
- name: Setup environment variables
run: |
echo "MIOPEN_FIND_ENFORCE=3" >> $GITHUB_ENV
echo "PYTHONUNBUFFERED=1" >> $GITHUB_ENV
echo "ROCR_VISIBLE_DEVICES=0" >> $GITHUB_ENV
- name: Run hipblaslt-bench
if: always() && (github.event_name == 'schedule' || inputs.run_hipblaslt == true)
run: |
source ${VENV_DIR}/bin/activate
tarFileName=$( aws s3 ls s3://therock-nightly-tarball/ --recursive --no-sign-request | grep '\.tar\.gz$' | grep linux-gfx110X | sort -r | head -1 | awk '{print $NF}' || true )
echo "Installing $tarFileName"
wget https://therock-nightly-tarball.s3.us-east-2.amazonaws.com/${tarFileName}
mkdir install
tar -xf *.tar.gz -C install
export PATH=$PWD/install/bin:$PATH
echo "============= Running Gemm Shape with hipblaslt =============================="
bash amd-shark-ai-reports/boo/hipblaslt-bench.sh >& output_artifacts/rdna3_gemm_hipBlaslt-bench.csv
- name: Run Prod Conv with verify numerics
if: always() && (github.event_name == 'schedule' || inputs.run_prod_conv == true)
run: |
echo "============ Running Prod Conv shape with MIOpen and IREE without tuning ==================="
iree-boo-driver \
--commands-file amd-shark-ai-reports/boo/prod_conv_config.txt --csv output_artifacts/rdna3_prod_conv_miopen_iree.csv \
--backend=iree_boo_experimental --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 \
2>&1 | tee output_artifacts/rdna3_prod_conv_miopen_iree.log || true
- name: Run All Proxy
if: always() && (github.event_name == 'schedule' || inputs.run_all_proxy == true)
run: |
echo "============ Running All proxy shape with MIOpen and IREE without tuning ==================="
## temporarily disable --backend=torch
iree-boo-driver \
--commands-file amd-shark-ai-reports/boo/all_proxy_config.txt --csv output_artifacts/rdna3_all_proxy_miopen_iree.csv \
--backend=iree_boo_experimental --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 \
2>&1 | tee output_artifacts/rdna3_all_proxy_miopen_iree.log || true
- name: Run Gemm with verify numerics
if: always() && (github.event_name == 'schedule' || inputs.run_gemm == true)
run: |
echo "============ Running Gemm shape with MIOpen and IREE without tuning ======================================="
iree-boo-driver \
--commands-file amd-shark-ai-reports/boo/gemm_config.txt --csv output_artifacts/rdna3_gemm_miopen_iree.csv \
--backend=iree_boo_experimental \
2>&1 | tee output_artifacts/rdna3_gemm_miopen_iree.log || true
- name: Run Batch Norm with verify numerics
if: always() && (github.event_name == 'schedule' || inputs.run_batch_norm == true)
run: |
echo "============ Running Batch norm shape with MIOpen and IREE without tuning ======================================="
iree-boo-driver \
--commands-file amd-shark-ai-reports/boo/batch_norm_config.txt --csv output_artifacts/rdna3_batch_norm_miopen_iree.csv \
--backend=iree_boo_experimental --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 \
2>&1 | tee output_artifacts/rdna3_batch_norm_miopen_iree.log || true
- name: Run Prod Conv NCHW with verify numerics
if: always() && (github.event_name == 'schedule' || inputs.run_prod_conv_nchw == true)
run: |
echo "============ Running Prod Conv shape with NCHW layout with MIOpen and IREE without tuning ==================="
iree-boo-driver \
--commands-file amd-shark-ai-reports/boo/prod_conv_config_nchw.txt --csv output_artifacts/rdna3_prod_conv_miopen_iree_nchw.csv \
--backend=iree_boo_experimental --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 \
2>&1 | tee output_artifacts/rdna3_prod_conv_miopen_iree_nchw.log || true
- name: Run All Proxy NCHW with verify numerics
if: always() && (github.event_name == 'schedule' || inputs.run_all_proxy_nchw == true)
run: |
echo "============ Running All proxy shape with NCHW layout with MIOpen and IREE without tuning ==================="
iree-boo-driver \
--commands-file amd-shark-ai-reports/boo/all_proxy_config_nchw.txt --csv output_artifacts/rdna3_all_proxy_miopen_iree_nchw.csv \
--backend=iree_boo_experimental --verify-numerics --numerics-reference-dtype=float32 --numerics-min-samples 100 \
2>&1 | tee output_artifacts/rdna3_all_proxy_miopen_iree_nchw.log || true
- name: Run Attention
if: always() && (github.event_name == 'schedule' || inputs.run_attention == true)
run: |
echo "============ Running attention shapes with Inductor and IREE without tuning ==================="
pip install -r amdsharktuner/requirements-dev.txt
iree-boo-driver \
--commands-file amd-shark-ai-reports/boo/attention_shapes.txt --csv output_artifacts/rdna3_attention_shapes_miopen_iree.csv \
--backend=iree_boo_experimental \
2>&1 | tee output_artifacts/rdna3_attention_shapes.log || true
- name: Upload log files
if: always()
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: rdna-boo-logs
path: |
output_artifacts/*.csv
output_artifacts/*.log
output_artifacts/version.txt
# New job to push logs to amd-shark-ai-reports repository
push_logs:
name: "Push log for conv run"
needs: [ test_boo_rdna3 ]
if: always()
runs-on: ubuntu-24.04
steps:
- name: Download log artifacts
uses: actions/download-artifact@v7
with:
name: rdna-boo-logs
path: logs
- name: Checkout Target Repo
if: always()
uses: actions/checkout@v6
with:
repository: nod-ai/amd-shark-ai-reports
token: ${{ secrets.AMD_SHARK_AI_GITHUB_TOKEN }}
path: amd-shark-ai-reports
- name: Generate Comparison
if: github.event_name == 'workflow_dispatch'
run: |
cd amd-shark-ai-reports
export LATEST_DATE=$(ls -1 boo/boo-nightly-gfx110x | sort | tail -n 1)
if [ "${{ inputs.run_all_proxy }}" = "true" ]; then
echo "1"
python github-actions/new_boo_comp.py --current-file "../logs/rdna3_all_proxy_miopen_iree.csv" --baseline-file "boo/boo-nightly-gfx110x/${LATEST_DATE}/rdna3_all_proxy_miopen_iree.csv" --output-file "../logs/comparison/current_vs_${LATEST_DATE}_all_proxy_iree.md" --threshold 0.05 --total-arguments 1071 || echo "failure in all_proxy"
fi
if [ "${{ inputs.run_gemm }}" = "true" ]; then
echo "2"
python github-actions/new_boo_comp.py --current-file "../logs/rdna3_gemm_miopen_iree.csv" --baseline-file "boo/boo-nightly-gfx110x/${LATEST_DATE}/rdna3_gemm_miopen_iree.csv" --output-file "../logs/comparison/current_vs_${LATEST_DATE}_gemm_iree.md" --threshold 0.05 --total-arguments 316 || echo "failure in gemm"
fi
if [ "${{ inputs.run_prod_conv }}" = "true" ]; then
echo "3"
python github-actions/new_boo_comp.py --current-file "../logs/rdna3_prod_conv_miopen_iree.csv" --baseline-file "boo/boo-nightly-gfx110x/${LATEST_DATE}/rdna3_prod_conv_miopen_iree.csv" --output-file "../logs/comparison/current_vs_${LATEST_DATE}_prod_conv_iree.md" --threshold 0.05 --total-arguments 589 || echo "failure in prod conv"
fi
if [ "${{ inputs.run_batch_norm }}" = "true" ]; then
echo "4"
python github-actions/new_boo_comp.py --current-file "../logs/rdna3_batch_norm_miopen_iree.csv" --baseline-file "boo/boo-nightly-gfx110x/${LATEST_DATE}/rdna3_batch_norm_miopen_iree.csv" --output-file "../logs/comparison/current_vs_${LATEST_DATE}_batch_norm_miopen_iree.md" --threshold 0.05 --total-arguments 33 || echo "Failure in batchnorm"
fi
if [ "${{ inputs.run_hipblaslt }}" = "true" ] && [ "${{ inputs.run_gemm }}" = "true" ]; then
echo "5"
python github-actions/new_boo_comp_hipblaslt_iree.py --hipblaslt-file "../logs/rdna3_gemm_hipBlaslt-bench.csv" --iree-file "../logs/rdna3_gemm_miopen_iree.csv" --output-csv "../logs/comparison/gemm_iree_hipblaslt_comparison.csv" || echo "failure in hipblaslt"
fi
cd ..
ls -R logs
- name: Setup reports
if: always()
run: |
git config --global user.name "GitHub Actions Bot"
git config --global user.email ""
if [ "${{ github.event_name }}" = "schedule" ]; then
date=$(date -u +'%Y-%m-%d')
OUTPUT_DIR="amd-shark-ai-reports/boo/boo-nightly-gfx110X/$date/"
else
date="$(date -u +'%Y-%m-%d_%H-%M')"
OUTPUT_DIR="amd-shark-ai-reports/boo/boo-custom-runs-gfx110X/${date}/"
fi
mkdir -p $OUTPUT_DIR
echo "OUTPUT_DIR=$OUTPUT_DIR" >> $GITHUB_ENV
- name: Push artifacts
if: always()
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
mkdir -p $OUTPUT_DIR/comparison/
cp -v logs/comparison/*.md $OUTPUT_DIR/comparison/ || true
fi
cp -v logs/*.csv $OUTPUT_DIR || true
cp -v logs/*.log $OUTPUT_DIR || true
cp -v logs/version.txt $OUTPUT_DIR || true
cd amd-shark-ai-reports/boo
git pull
git add .
git commit -m "Add conv report on $(date -u +'%Y-%m-%d')"
git push origin main
rm -rf ../logs