Skip to content

[hipSPARSELt] Disable GRVWM expansion when running TensileClient #3011

[hipSPARSELt] Disable GRVWM expansion when running TensileClient

[hipSPARSELt] Disable GRVWM expansion when running TensileClient #3011

name: hipBLASLt ASAN CI
on:
pull_request:
types:
- opened
- synchronize
- reopened
- labeled
paths:
- "projects/hipblaslt/**"
- "test/therock/test_hipblaslt.py"
- "test/therock/lsan.supp"
- ".github/workflows/hipblaslt-asan-ci.yml"
workflow_dispatch:
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true
jobs:
setup:
name: "Setup"
runs-on: ubuntu-24.04
outputs:
therock_ref: ${{ steps.ci-env.outputs.therock-ref }}
docker_image: ${{ steps.ci-env.outputs.docker-image }}
build_runs_on: ${{ steps.ci-env.outputs.linux-runner }}
build_matrix: ${{ steps.matrix.outputs.build_matrix }}
test_matrix: ${{ steps.matrix.outputs.test_matrix }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
sparse-checkout: .github
sparse-checkout-cone-mode: true
fetch-depth: 1
- name: Load CI environment
id: ci-env
uses: ./.github/actions/ci-env
- name: Compute arch matrix
id: matrix
env:
IS_DISPATCH: ${{ github.event_name == 'workflow_dispatch' }}
HAS_LABEL: ${{ contains(github.event.pull_request.labels.*.name, 'ci:asan') }}
run: |
build='[{"family":"gfx90a"}]'
test='[{"family":"gfx90a","runs_on":"linux-gfx90a-gpu-rocm"}]'
if [[ "$IS_DISPATCH" == "true" || "$HAS_LABEL" == "true" ]]; then
build='[{"family":"gfx90a"},{"family":"gfx942"}]'
test='[{"family":"gfx90a","runs_on":"linux-gfx90a-gpu-rocm"},{"family":"gfx942","runs_on":"linux-gfx942-1gpu-core42-ossci-rocm"}]'
fi
echo "build_matrix={\"include\":${build}}" >> "$GITHUB_OUTPUT"
echo "test_matrix={\"include\":${test}}" >> "$GITHUB_OUTPUT"
build:
name: "Build (hipBLASLt | ${{ matrix.family }} | HOST_ASAN)"
needs: setup
runs-on: ${{ needs.setup.outputs.build_runs_on }}
timeout-minutes: 600
continue-on-error: ${{ matrix.family == 'gfx942' }}
permissions:
id-token: write
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.setup.outputs.build_matrix) }}
container:
image: ${{ needs.setup.outputs.docker_image }}
options: -v /runner/config:/home/awsconfig/
env:
CACHE_DIR: ${{ github.workspace }}/.container-cache
CCACHE_CONFIGPATH: ${{ github.workspace }}/.ccache/ccache.conf
AMDGPU_FAMILIES: ${{ matrix.family }}
TEATIME_FORCE_INTERACTIVE: 0
AWS_SHARED_CREDENTIALS_FILE: /home/awsconfig/credentials.ini
ARTIFACT_GROUP: ${{ matrix.family }}-asan
steps:
- name: "Check out rocm-libraries"
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Checkout TheRock repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
repository: "ROCm/TheRock"
path: TheRock
ref: ${{ needs.setup.outputs.therock_ref }}
- name: Install python deps
run: |
pip install -r TheRock/requirements.txt
- name: Adjust git config
run: |
git config --global --add safe.directory $PWD
git config fetch.parallel 10
- name: Setup ccache
run: |
./TheRock/build_tools/setup_ccache.py \
--config-preset "github-oss-dev" \
--dir "$(dirname $CCACHE_CONFIGPATH)" \
--local-path "$CACHE_DIR/ccache"
- name: Runner health status
run: |
./TheRock/build_tools/health_status.py
- name: Pull DVC files for rocm-libraries
run: |
if command -v dvc &> /dev/null; then
echo "dvc detected"
else
echo "Warning, dvc not detected!"
fi
LOGNAME=github-runner dvc pull -v
- name: Fetch sources
timeout-minutes: 30
run: |
./TheRock/build_tools/fetch_sources.py --jobs 12 --no-include-rocm-libraries --no-include-ml-frameworks
- name: Configure Projects
env:
amdgpu_families: ${{ env.AMDGPU_FAMILIES }}
package_version: ADHOCBUILD
extra_cmake_options: "-DTHEROCK_ROCM_LIBRARIES_SOURCE_DIR=../ -DhipBLASLt_SANITIZER=HOST_ASAN -Drocprofiler-sdk_CMAKE_ARGS=-DROCPROFILER_BUILD_TESTS=OFF"
BUILD_DIR: build
run: |
python3 TheRock/build_tools/github_actions/build_configure.py
- name: Build hipBLASLt (ASAN)
run: cmake --build TheRock/build --target hipBLASLt -- -k 0
- name: Report
if: ${{ !cancelled() }}
run: |
echo "## hipBLASLt"
echo "------------------------------------"
for kind in stage dist; do
find TheRock/build -maxdepth 6 -type d -path "*/math-libs/BLAS/hipBLASLt/${kind}" 2>/dev/null || true
done
find TheRock/build -maxdepth 8 -type f -name "hipblaslt-test" 2>/dev/null || true
echo "CCache Stats:"
echo "-------------"
ccache -s -v || true
- name: Package hipBLASLt trees
if: ${{ !cancelled() }}
run: |
set -uo pipefail
mkdir -p upload/hipblaslt
found=0
for kind in stage dist; do
d=$(find TheRock/build -maxdepth 6 -type d -path "*/math-libs/BLAS/hipBLASLt/${kind}" 2>/dev/null | head -1)
if [ -n "$d" ] && [ -d "$d" ]; then
echo "Packaging hipBLASLt/${kind}: $d"
if tar -C "$d" -czf "upload/hipblaslt/hipblaslt-${kind}.tar.gz" . ; then found=1; fi
else
echo "::warning::no hipBLASLt/${kind} tree found"
fi
done
[ "$found" = 1 ] || echo "::warning::no stage/dist tree packaged for hipBLASLt"
ls -lh upload/hipblaslt || true
- name: Upload hipBLASLt trees
if: ${{ !cancelled() }}
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: asan-trees-hipblaslt-${{ matrix.family }}
path: upload/hipblaslt/*.tar.gz
if-no-files-found: warn
retention-days: 7
test:
name: "Test hipBLASLt HOST_ASAN | ${{ matrix.family }} (quick)"
needs: [setup, build]
runs-on: ${{ matrix.runs_on }}
timeout-minutes: 210
continue-on-error: ${{ matrix.family == 'gfx942' }}
container:
image: ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:4150afe4759d14822f0e3f8930e1124f26e11f68b5c7b91ec9a02b20b1ebbb98
options: --ipc host
--group-add video
--device /dev/kfd
--device /dev/dri
--group-add 993
--group-add 992
--group-add 110
--env-file /etc/podinfo/gha-gpu-isolation-settings
strategy:
fail-fast: false
matrix: ${{ fromJSON(needs.setup.outputs.test_matrix) }}
defaults:
run:
shell: bash
env:
VENV_DIR: ${{ github.workspace }}/.venv
ARTIFACT_RUN_ID: ${{ github.run_id }}
OUTPUT_ARTIFACTS_DIR: "./build"
THEROCK_BIN_DIR: "./build/bin"
THEROCK_DIR: ${{ github.workspace }}
AMDGPU_FAMILIES: ${{ matrix.family }}
ARTIFACT_GROUP: ${{ matrix.family }}-asan
TEST_COMPONENT: hipblaslt
steps:
- name: Checkout TheRock repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
repository: "ROCm/TheRock"
ref: ${{ needs.setup.outputs.therock_ref }}
- name: Checkout rocm-libraries repository for scripts
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
path: rocm-libraries
- name: Download hipBLASLt ASAN trees
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: asan-trees-hipblaslt-${{ matrix.family }}
path: asan-trees
- name: Assemble runnable tree from dist (+ stage overlay)
run: |
set -euo pipefail
mkdir -p "${OUTPUT_ARTIFACTS_DIR}"
tar -C "${OUTPUT_ARTIFACTS_DIR}" -xzf "asan-trees/hipblaslt-dist.tar.gz"
if [ -f "asan-trees/hipblaslt-stage.tar.gz" ]; then
tar -C "${OUTPUT_ARTIFACTS_DIR}" -xzf "asan-trees/hipblaslt-stage.tar.gz"
fi
test -x "${THEROCK_BIN_DIR}/hipblaslt-test"
echo "Extracted tree:"; ls -1 "${OUTPUT_ARTIFACTS_DIR}"
- name: Driver / GPU sanity check
timeout-minutes: 3
run: |
export PATH="${GITHUB_WORKSPACE}/build/bin:${PATH}"
export LD_LIBRARY_PATH="${GITHUB_WORKSPACE}/build/lib:${LD_LIBRARY_PATH:-}"
timeout -k 10 120 python3 ./build_tools/print_driver_gpu_info.py || true
- name: ASAN runtime environment
run: |
set -euo pipefail
OUT="${GITHUB_WORKSPACE}/build"
ASAN_RT_DIR="$(dirname "$(find "$OUT/lib/llvm/lib/clang" -name 'libclang_rt.asan-x86_64.so' | head -1)")"
LSAN_SUPP="${GITHUB_WORKSPACE}/rocm-libraries/test/therock/lsan.supp"
{
echo "ASAN_OPTIONS=quarantine_size_mb=600"
echo "LSAN_OPTIONS=suppressions=${LSAN_SUPP}:print_suppressions=0"
echo "HSA_XNACK=1"
echo "ASAN_SYMBOLIZER_PATH=${OUT}/lib/llvm/bin/llvm-symbolizer"
echo "LD_LIBRARY_PATH=${OUT}/lib:${ASAN_RT_DIR}:${LD_LIBRARY_PATH:-}"
} >> "$GITHUB_ENV"
- name: Test hipBLASLt (quick)
timeout-minutes: 180
env:
SHARD_INDEX: 1
TOTAL_SHARDS: 1
TEST_TYPE: quick
run: |
mkdir -p ./test_logs
python3 rocm-libraries/test/therock/test_hipblaslt.py 2>&1 | tee ./test_logs/hipblaslt-test_output.log
summary:
name: "hipBLASLt ASAN CI Summary"
if: always()
needs:
- build
- test
runs-on: ubuntu-24.04
steps:
- name: Report and gate
env:
BUILD: ${{ needs.build.result }}
TEST: ${{ needs.test.result }}
run: |
echo "Build (hipBLASLt ASAN, gfx90a-gating): ${BUILD}"
echo "Test (hipBLASLt ASAN, gfx90a-gating): ${TEST}"
echo "Note: gfx942 is opt-in (ci:asan label) and non-gating."
if [[ "$BUILD" != "success" || "$TEST" != "success" ]]; then
echo "::error::hipBLASLt ASAN failed (build=${BUILD} test=${TEST})"
exit 1
fi