Skip to content

benchmark_gpu_coprocessor #7

benchmark_gpu_coprocessor

benchmark_gpu_coprocessor #7

# Run all fhevm coprocessor benchmarks on a GPU instance on Hyperstack and return parsed results to Slab CI bot.
name: benchmark_gpu_coprocessor
on:
workflow_dispatch:
inputs:
profile:
description: "Instance type"
required: true
type: choice
options:
- "l40 (n3-L40x1)"
- "4-l40 (n3-L40x4)"
- "single-h100 (n3-H100x1)"
- "2-h100 (n3-H100x2)"
- "4-h100 (n3-H100x4)"
- "multi-h100 (n3-H100x8)"
- "multi-h100-nvlink (n3-H100x8-NVLink)"
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
- "multi-h100-sxm5_fallback (n3-H100x8-SXM5)"
schedule:
# Weekly tests @ 1AM
- cron: "0 1 * * 6"
permissions:
contents: read
env:
CARGO_TERM_COLOR: always
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
PROFILE_SCHEDULED_RUN: "multi-h100-sxm5 (n3-H100x8-SXM5)"
PROFILE_MANUAL_RUN: ${{ inputs.profile }}
IS_MANUAL_RUN: ${{ github.event_name == 'workflow_dispatch' }}
BENCHMARK_TYPE: "ALL"
OPTIMIZATION_TARGET: "throughput"
BATCH_SIZE: "5000"
SCHEDULING_POLICY: "MAX_PARALLELISM"
BENCHMARKS: "erc20"
BRANCH_NAME: ${{ github.ref_name }}
COMMIT_SHA: ${{ github.sha }}
SLAB_SECRET: ${{ secrets.JOB_SECRET }}
jobs:
parse-inputs:
name: benchmark_gpu_coprocessor/parse-inputs
runs-on: ubuntu-latest
permissions:
contents: 'read'
outputs:
profile: ${{ steps.parse_profile.outputs.profile }}
hardware_name: ${{ steps.parse_hardware_name.outputs.name }}
steps:
- name: Parse profile
id: parse_profile
run: |
if [[ ${IS_MANUAL_RUN} == true ]]; then
PROFILE_RAW="${PROFILE_MANUAL_RUN}"
else
PROFILE_RAW="${PROFILE_SCHEDULED_RUN}"
fi
# shellcheck disable=SC2001
PROFILE_VAL=$(echo "${PROFILE_RAW}" | sed 's|\(.*\)[[:space:]](.*)|\1|')
echo "profile=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
- name: Parse hardware name
id: parse_hardware_name
run: |
if [[ ${IS_MANUAL_RUN} == true ]]; then
PROFILE_RAW="${PROFILE_MANUAL_RUN}"
else
PROFILE_RAW="${PROFILE}"
fi
# shellcheck disable=SC2001
PROFILE_VAL=$(echo "${PROFILE_RAW}" | sed 's|.*[[:space:]](\(.*\))|\1|')
echo "name=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"
setup-instance:
name: benchmark_gpu_coprocessor/setup-instance
needs: parse-inputs
runs-on: ubuntu-latest
permissions:
contents: 'read'
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label }}
steps:
- name: Start remote instance
id: start-remote-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: hyperstack
profile: ${{ needs.parse-inputs.outputs.profile }}
benchmark-gpu:
name: benchmark_gpu_coprocessor/benchmark-gpu (bpr)
needs: [ parse-inputs, setup-instance ]
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
continue-on-error: true
timeout-minutes: 720 # 12 hours
permissions:
contents: 'read'
packages: 'read'
strategy:
fail-fast: false
# explicit include-based build matrix, of known valid options
matrix:
include:
- os: ubuntu-22.04
cuda: "12.8"
gcc: 11
env:
HW_NAME: "${{ needs.parse-inputs.outputs.hardware_name }}"
steps:
- name: Install git LFS
run: |
sudo apt-get remove -y unattended-upgrades
sudo apt-get update
sudo apt-get install -y git-lfs protobuf-compiler
git lfs install
- name: Checkout tfhe-rs
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
path: tfhe-rs
persist-credentials: false
- name: Check fhEVM and TFHE-rs repos
run: |
pwd
ls
- name: Checkout fhevm
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
repository: zama-ai/fhevm
persist-credentials: 'false'
fetch-depth: 0
lfs: true
ref: antoniu/use-tfhe-main-benches
path: fhevm
- name: Get benchmark details
run: |
COMMIT_DATE_ENV=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${COMMIT_SHA}")
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$COMMIT_DATE_ENV";
echo "COMMIT_HASH=$(git rev-parse HEAD)";
} >> "${GITHUB_ENV}"
working-directory: tfhe-rs/
- name: Setup Hyperstack dependencies
uses: ./tfhe-rs/.github/actions/gpu_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Check fhEVM and TFHE-rs repos
run: |
pwd
ls
mv tfhe-rs fhevm/coprocessor/
- name: Checkout LFS objects
run: git lfs checkout
working-directory: fhevm/
- name: Install rust
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
- name: Install cargo dependencies
run: |
sudo apt-get install -y protobuf-compiler pkg-config libssl-dev \
libclang-dev docker-compose-v2 docker.io acl
sudo usermod -aG docker "$USER"
newgrp docker
sudo setfacl --modify user:"$USER":rw /var/run/docker.sock
cargo install sqlx-cli
- name: Install foundry
uses: foundry-rs/foundry-toolchain@50d5a8956f2e319df19e6b57539d7e2acb9f8c1e
- name: Cache cargo
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-
- name: Login to GitHub Container Registry
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to Chainguard Registry
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
with:
registry: cgr.dev
username: ${{ secrets.CGR_USERNAME }}
password: ${{ secrets.CGR_PASSWORD }}
- name: Init database
run: make init_db
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
- name: Use Node.js
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
with:
node-version: 20.x
- name: Build contracts
env:
HARDHAT_NETWORK: hardhat
run: |
ls
pwd
cp ./host-contracts/.env.example ./host-contracts/.env
cd ./host-contracts
npm ci --include=optional
npm install && npm run deploy:emptyProxies && npx hardhat compile
working-directory: fhevm/
- name: Profile erc20 no-cmux benchmark on GPU
run: |
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" \
FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" \
BENCHMARK_TYPE="THROUGHPUT_200" \
OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" \
make -e "profile_erc20_gpu"
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
- name: Get nsys profile name
id: nsys_profile_name
run: echo "profile=coprocessor_profile_$(date +"%Y-%m-%d-%Hh").nsys-rep" >> "$GITHUB_OUTPUT"
- name: Timestamp nsys profile # zizmor: ignore[template-injection]
env:
REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
run: |
mv report1.nsys-rep ${{ env.REPORT_NAME }}
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
- name: Upload profile artifact
env:
REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
with:
name: ${{ env.REPORT_NAME }}
path: fhevm/coprocessor/fhevm-engine/tfhe-worker/${{ env.REPORT_NAME }}
- name: Run latency benchmark on GPU
run: |
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
- name: Run throughput benchmarks on GPU
run: |
BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="THROUGHPUT_200" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
- name: Parse results
run: |
python3 ./ci/benchmark_parser.py coprocessor/fhevm-engine/target/criterion "${RESULTS_FILENAME}" \
--database coprocessor \
--hardware "${HW_NAME}" \
--backend gpu \
--project-version "${COMMIT_HASH}" \
--branch "${BRANCH_NAME}" \
--commit-date "${COMMIT_DATE}" \
--bench-date "${BENCH_DATE}" \
--walk-subdirs \
--crate "coprocessor/fhevm-engine/tfhe-worker" \
--name-suffix "operation_batch_size_${BATCH_SIZE}-schedule_${SCHEDULING_POLICY}-optimization_target_${OPTIMIZATION_TARGET}"
working-directory: fhevm/
- name: Upload parsed results artifact
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
with:
name: ${COMMIT_SHA}_${BENCHMARKS}_${{ needs.parse-inputs.outputs.profile }}
path: fhevm/$${{ env.RESULTS_FILENAME }}
- name: Checkout Slab repo
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
with:
repository: zama-ai/slab
path: slab
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Send data to Slab
shell: bash
env:
SLAB_URL: ${{ secrets.SLAB_URL }}
run: |
python3 slab/scripts/data_sender.py fhevm/"${RESULTS_FILENAME}" "${SLAB_SECRET}" \
--slab-url "${SLAB_URL}"
teardown-instance:
name: benchmark_gpu_coprocessor/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, benchmark-gpu ]
runs-on: ubuntu-latest
permissions:
contents: 'read'
steps:
- name: Stop remote instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}