benchmark_gpu_coprocessor #18

Workflow file for this run

.github/workflows/benchmark_gpu_coprocessor.yml at 54c8c5e

	# Run all fhevm coprocessor benchmarks on a GPU instance on Hyperstack and return parsed results to Slab CI bot.
	name: benchmark_gpu_coprocessor

	on:
	workflow_dispatch:
	inputs:
	profile:
	description: "Instance type"
	required: true
	type: choice
	options:
	- "l40 (n3-L40x1)"
	- "4-l40 (n3-L40x4)"
	- "single-h100 (n3-H100x1)"
	- "2-h100 (n3-H100x2)"
	- "4-h100 (n3-H100x4)"
	- "multi-h100 (n3-H100x8)"
	- "multi-h100-nvlink (n3-H100x8-NVLink)"
	- "multi-h100-sxm5 (n3-H100-SXM5x8)"
	- "multi-h100-sxm5_fallback (n3-H100-SXM5x8)"

	schedule:
	# Weekly tests @ 1AM
	- cron: "0 1 * * 6"

	permissions:
	contents: read

	# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning

	env:
	CARGO_TERM_COLOR: always
	RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
	ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
	RUST_BACKTRACE: "full"
	RUST_MIN_STACK: "8388608"
	CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN \|\| secrets.GITHUB_TOKEN }}
	PROFILE_SCHEDULED_RUN: "multi-h100-sxm5 (n3-H100-SXM5x8)"
	PROFILE_MANUAL_RUN: ${{ inputs.profile }}
	IS_MANUAL_RUN: ${{ github.event_name == 'workflow_dispatch' }}
	BENCHMARK_TYPE: "ALL"
	OPTIMIZATION_TARGET: "throughput"
	BATCH_SIZE: "5000"
	SCHEDULING_POLICY: "MAX_PARALLELISM"
	BENCHMARKS: "erc20"
	BRANCH_NAME: ${{ github.ref_name }}
	COMMIT_SHA: ${{ github.sha }}
	SLAB_SECRET: ${{ secrets.JOB_SECRET }}

	jobs:
	parse-inputs:
	name: benchmark_gpu_coprocessor/parse-inputs
	runs-on: ubuntu-latest
	permissions:
	contents: 'read'
	outputs:
	profile: ${{ steps.parse_profile.outputs.profile }}
	hardware_name: ${{ steps.parse_hardware_name.outputs.name }}
	steps:
	- name: Parse profile
	id: parse_profile
	run: \|
	if [[ ${IS_MANUAL_RUN} == true ]]; then
	PROFILE_RAW="${PROFILE_MANUAL_RUN}"
	else
	PROFILE_RAW="${PROFILE_SCHEDULED_RUN}"
	fi
	# shellcheck disable=SC2001
	PROFILE_VAL=$(echo "${PROFILE_RAW}" \| sed 's\|$.$[[:space:]](.)\|\1\|')
	echo "profile=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"

	- name: Parse hardware name
	id: parse_hardware_name
	run: \|
	if [[ ${IS_MANUAL_RUN} == true ]]; then
	PROFILE_RAW="${PROFILE_MANUAL_RUN}"
	else
	PROFILE_RAW="${PROFILE}"
	fi
	# shellcheck disable=SC2001
	PROFILE_VAL=$(echo "${PROFILE_RAW}" \| sed 's\|.[[:space:]]($.$)\|\1\|')
	echo "name=$PROFILE_VAL" >> "${GITHUB_OUTPUT}"

	setup-instance:
	name: benchmark_gpu_coprocessor/setup-instance
	needs: parse-inputs
	runs-on: ubuntu-latest
	permissions:
	contents: 'read'
	outputs:
	runner-name: ${{ steps.start-remote-instance.outputs.label }}
	steps:
	- name: Start remote instance
	id: start-remote-instance
	uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
	with:
	mode: start
	github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
	slab-url: ${{ secrets.SLAB_BASE_URL }}
	job-secret: ${{ secrets.JOB_SECRET }}
	backend: hyperstack
	profile: ${{ needs.parse-inputs.outputs.profile }}

	benchmark-gpu:
	name: benchmark_gpu_coprocessor/benchmark-gpu (bpr)
	needs: [ parse-inputs, setup-instance ]
	runs-on: ${{ needs.setup-instance.outputs.runner-name }}
	continue-on-error: true
	timeout-minutes: 720 # 12 hours
	permissions:
	contents: 'read' # Needed to read repositories contents
	packages: 'read' # Needed to get fhevm packages
	strategy:
	fail-fast: false
	# explicit include-based build matrix, of known valid options
	matrix:
	include:
	- os: ubuntu-22.04
	cuda: "12.8"
	gcc: 11
	env:
	HW_NAME: "${{ needs.parse-inputs.outputs.hardware_name }}"

	steps:
	- name: Install git LFS
	run: \|
	sudo apt-get remove -y unattended-upgrades
	sudo apt-get update
	sudo apt-get install -y git-lfs protobuf-compiler
	git lfs install

	- name: Checkout tfhe-rs
	uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
	with:
	path: tfhe-rs
	persist-credentials: false

	- name: Check fhEVM and TFHE-rs repos
	run: \|
	pwd
	ls

	- name: Checkout fhevm
	uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
	with:
	repository: zama-ai/fhevm
	persist-credentials: 'false'
	fetch-depth: 0
	lfs: true
	ref: antoniu/use-tfhe-main-benches
	path: fhevm

	- name: Get benchmark details
	run: \|
	COMMIT_DATE_ENV=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${COMMIT_SHA}")
	{
	echo "BENCH_DATE=$(date --iso-8601=seconds)";
	echo "COMMIT_DATE=$COMMIT_DATE_ENV";
	echo "COMMIT_HASH=$(git rev-parse HEAD)";
	} >> "${GITHUB_ENV}"
	working-directory: tfhe-rs/

	- name: Setup Hyperstack dependencies
	uses: ./tfhe-rs/.github/actions/gpu_setup
	with:
	cuda-version: ${{ matrix.cuda }}
	gcc-version: ${{ matrix.gcc }}
	github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}

	- name: Check fhEVM and TFHE-rs repos
	run: \|
	pwd
	ls
	mv tfhe-rs fhevm/coprocessor/

	- name: Checkout LFS objects
	run: git lfs checkout
	working-directory: fhevm/

	- name: Install rust
	uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
	with:
	toolchain: nightly

	- name: Install cargo dependencies
	run: \|
	sudo apt-get install -y protobuf-compiler pkg-config libssl-dev \
	libclang-dev docker-compose-v2 docker.io acl
	sudo usermod -aG docker "$USER"
	newgrp docker
	sudo setfacl --modify user:"$USER":rw /var/run/docker.sock
	cargo install sqlx-cli

	- name: Install foundry
	uses: foundry-rs/foundry-toolchain@50d5a8956f2e319df19e6b57539d7e2acb9f8c1e

	- name: Cache cargo
	uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
	with:
	path: \|
	~/.cargo/registry
	~/.cargo/git
	target
	key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
	restore-keys: ${{ runner.os }}-cargo-

	- name: Login to GitHub Container Registry
	uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
	with:
	registry: ghcr.io
	username: ${{ github.actor }}
	password: ${{ secrets.GITHUB_TOKEN }}

	- name: Login to Chainguard Registry
	uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
	with:
	registry: cgr.dev
	username: ${{ secrets.CGR_USERNAME }}
	password: ${{ secrets.CGR_PASSWORD }}

	- name: Init database
	run: make init_db
	working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker

	- name: Use Node.js
	uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
	with:
	node-version: 20.x

	- name: Build contracts
	env:
	HARDHAT_NETWORK: hardhat
	run: \|
	ls
	pwd
	cp ./host-contracts/.env.example ./host-contracts/.env
	cd ./host-contracts
	npm ci --include=optional
	npm install && npm run deploy:emptyProxies && npx hardhat compile
	working-directory: fhevm/

	- name: Profile erc20 no-cmux benchmark on GPU
	run: \|
	BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" \
	FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" \
	BENCHMARK_TYPE="THROUGHPUT_200" \
	OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" \
	make -e "profile_erc20_gpu"
	working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker

	- name: Get nsys profile name
	id: nsys_profile_name
	run: echo "profile=coprocessor_profile_$(date +"%Y-%m-%d-%Hh").nsys-rep" >> "$GITHUB_OUTPUT"

	- name: Timestamp nsys profile # zizmor: ignore[template-injection]
	env:
	REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
	run: \|
	mv report1.nsys-rep ${{ env.REPORT_NAME }}
	working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker

	- name: Upload profile artifact
	env:
	REPORT_NAME: ${{ steps.nsys_profile_name.outputs.profile }}
	uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
	with:
	name: ${{ env.REPORT_NAME }}
	path: fhevm/coprocessor/fhevm-engine/tfhe-worker/${{ env.REPORT_NAME }}

	- name: Run latency benchmark on GPU
	run: \|
	BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="LATENCY" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
	working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker

	- name: Run throughput benchmarks on GPU
	run: \|
	BENCHMARK_BATCH_SIZE="${BATCH_SIZE}" FHEVM_DF_SCHEDULE="${SCHEDULING_POLICY}" BENCHMARK_TYPE="THROUGHPUT_200" OPTIMIZATION_TARGET="${OPTIMIZATION_TARGET}" make -e "benchmark_${BENCHMARKS}_gpu"
	working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker

	- name: Parse results
	run: \|
	python3 ./ci/benchmark_parser.py coprocessor/fhevm-engine/target/criterion "${RESULTS_FILENAME}" \
	--database coprocessor \
	--hardware "${HW_NAME}" \
	--backend gpu \
	--project-version "${COMMIT_HASH}" \
	--branch "${BRANCH_NAME}" \
	--commit-date "${COMMIT_DATE}" \
	--bench-date "${BENCH_DATE}" \
	--walk-subdirs \
	--crate "coprocessor/fhevm-engine/tfhe-worker" \
	--name-suffix "operation_batch_size_${BATCH_SIZE}-schedule_${SCHEDULING_POLICY}-optimization_target_${OPTIMIZATION_TARGET}"
	working-directory: fhevm/

	- name: Upload parsed results artifact
	uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
	with:
	name: ${COMMIT_SHA}_${BENCHMARKS}_${{ needs.parse-inputs.outputs.profile }}
	path: fhevm/$${{ env.RESULTS_FILENAME }}

	- name: Checkout Slab repo
	uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
	with:
	repository: zama-ai/slab
	path: slab
	persist-credentials: 'false'
	token: ${{ secrets.REPO_CHECKOUT_TOKEN }}

	- name: Send data to Slab
	shell: bash
	env:
	SLAB_URL: ${{ secrets.SLAB_URL }}
	run: \|
	python3 slab/scripts/data_sender.py fhevm/"${RESULTS_FILENAME}" "${SLAB_SECRET}" \
	--slab-url "${SLAB_URL}"

	teardown-instance:
	name: benchmark_gpu_coprocessor/teardown-instance
	if: ${{ always() && needs.setup-instance.result == 'success' }}
	needs: [ setup-instance, benchmark-gpu ]
	runs-on: ubuntu-latest
	permissions:
	contents: 'read'
	steps:
	- name: Stop remote instance
	id: stop-instance
	uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
	with:
	mode: stop
	github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
	slab-url: ${{ secrets.SLAB_BASE_URL }}
	job-secret: ${{ secrets.JOB_SECRET }}
	label: ${{ needs.setup-instance.outputs.runner-name }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

benchmark_gpu_coprocessor #18

Workflow file

benchmark_gpu_coprocessor #18

Uh oh!

Jobs

Run details

Workflow file for this run