Skip to content

Commit 4bf9f25

Browse files
committed
refine benchmark_flux.sh
1 parent 9e43e20 commit 4bf9f25

File tree

2 files changed

+68
-54
lines changed

2 files changed

+68
-54
lines changed

onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark_flux.sh

Lines changed: 67 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -4,113 +4,128 @@
44
# Licensed under the MIT License.
55
# -------------------------------------------------------------------------
66

7-
# Please run this script under conda or virtual environment with Python 3.10, 3.11 or 3.12.
8-
# bash benchmark_flux.sh <install_dir> <onnx_dir>
7+
set -euo pipefail
98

10-
# Installation directory (default: $HOME)
11-
install_dir="${1:-$HOME}"
9+
# Script to benchmark Flux models with ONNX and PyTorch
10+
# Usage: bash benchmark_flux.sh <install_dir> <onnx_dir>
11+
12+
# Validate inputs and environment
13+
command -v python3 &>/dev/null || { echo "Python3 is required but not installed."; exit 1; }
14+
command -v wget &>/dev/null || { echo "wget is required but not installed."; exit 1; }
1215

13-
# Root directory for the onnx models
16+
# Input arguments with defaults
17+
install_dir="${1:-$HOME}"
1418
onnx_dir="${2:-onnx_models}"
1519

16-
# Which GPU to use
20+
# GPU settings
1721
export CUDA_VISIBLE_DEVICES=0
1822

19-
# Function to install CUDA 12.6
20-
install_cuda_12()
21-
{
22-
pushd $install_dir
23-
wget https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.35.03_linux.run
24-
sh cuda_12.6.2_560.35.03_linux.run --toolkit --toolkitpath=$install_dir/cuda12.6 --silent --override --no-man-page
23+
# Function to log messages
24+
log() {
25+
echo -e "\033[1;32m[INFO]\033[0m $1"
26+
}
2527

28+
# Function to install CUDA 12.6
29+
install_cuda_12() {
30+
log "Installing CUDA 12.6"
31+
pushd "$install_dir"
32+
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.35.03_linux.run
33+
sh cuda_12.6.2_560.35.03_linux.run --toolkit --toolkitpath="$install_dir/cuda12.6" --silent --override --no-man-page
2634
export PATH="$install_dir/cuda12.6/bin:$PATH"
2735
export LD_LIBRARY_PATH="$install_dir/cuda12.6/lib64:$LD_LIBRARY_PATH"
2836
popd
2937
}
3038

3139
# Function to install cuDNN 9.6
3240
install_cudnn_9() {
41+
log "Installing cuDNN 9.6"
3342
pushd "$install_dir"
34-
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-9.6.0.74_cuda12-archive.tar.xz
43+
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-9.6.0.74_cuda12-archive.tar.xz
3544
mkdir -p "$install_dir/cudnn9.6"
36-
tar -Jxvf cudnn-linux-x86_64-9.6.0.74_cuda12-archive.tar.xz -C "$install_dir/cudnn9.6"--strip=1
37-
export LD_LIBRARY_PATH="$install_dir/cudnn9.5/lib:$LD_LIBRARY_PATH"
45+
tar -Jxvf cudnn-linux-x86_64-9.6.0.74_cuda12-archive.tar.xz -C "$install_dir/cudnn9.6" --strip=1
46+
export LD_LIBRARY_PATH="$install_dir/cudnn9.6/lib:$LD_LIBRARY_PATH"
3847
popd
3948
}
4049

41-
# Install optimum from source before 1.24 is released
50+
# Function to install optimum
4251
install_optimum() {
43-
pushd "$install_dir"
52+
log "Installing Optimum"
4453
optimum_dir="$install_dir/optimum"
4554
if [ ! -d "$optimum_dir" ]; then
46-
git clone https://github.com/huggingface/optimum
55+
git clone https://github.com/huggingface/optimum "$optimum_dir"
4756
fi
48-
cd "$sam2_dir"
49-
pip show optimum > /dev/null 2>&1 || pip install -e .
57+
pushd "$optimum_dir"
58+
pip show optimum &>/dev/null || pip install -e .
5059
popd
5160
}
5261

53-
# Install onnxruntime-gpu from source before 1.21 is released
62+
# Function to build and install ONNX Runtime
5463
install_onnxruntime() {
64+
log "Building ONNX Runtime"
5565
pushd "$install_dir"
56-
if ! [ -d onnxruntime ]; then
66+
if [ ! -d onnxruntime ]; then
5767
git clone https://github.com/microsoft/onnxruntime
5868
fi
59-
cd onnxruntime
69+
pushd onnxruntime
6070
CUDA_ARCH=$(python3 -c "import torch; cc = torch.cuda.get_device_capability(); print(f'{cc[0]}{cc[1]}')")
61-
if [ -n "$CUDA_ARCH" ]; then
62-
pip install --upgrade pip cmake psutil setuptools wheel packaging ninja numpy==2.2
63-
sh build.sh --config Release --build_dir build/cuda12 --build_shared_lib --parallel \
64-
--use_cuda --cuda_version 12.6 --cuda_home $install_dir/cuda12.6 \
65-
--cudnn_home $install_dir/cudnn9.6 \
66-
--build_wheel --skip_tests \
67-
--cmake_generator Ninja \
68-
--compile_no_warning_as_error \
69-
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=$CUDA_ARCH
70-
pip install build/cuda12/Release/dist/onnxruntime_gpu-*-linux_x86_64.whl
71-
else
71+
if [ -z "$CUDA_ARCH" ]; then
7272
echo "No CUDA device found."
7373
exit 1
7474
fi
75+
pip install --upgrade pip cmake psutil setuptools wheel packaging ninja numpy==2.2
76+
sh build.sh --config Release --build_dir build/cuda12 --parallel \
77+
--use_cuda --cuda_version 12.6 --cuda_home "$install_dir/cuda12.6" \
78+
--cudnn_home "$install_dir/cudnn9.6" \
79+
--build_wheel --skip_tests \
80+
--cmake_generator Ninja \
81+
--compile_no_warning_as_error \
82+
--cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF CMAKE_CUDA_ARCHITECTURES="$CUDA_ARCH"
83+
84+
log "Installing ONNX Runtime"
85+
pip install build/cuda12/Release/dist/onnxruntime_gpu-*-linux_x86_64.whl
86+
popd
7587
popd
7688
}
7789

78-
# Install GPU dependencies
90+
# Function to install GPU dependencies
7991
install_gpu() {
92+
log "Installing GPU dependencies"
8093
[ ! -d "$install_dir/cuda12.6" ] && install_cuda_12
8194
[ ! -d "$install_dir/cudnn9.6" ] && install_cudnn_9
8295
pip install torch torchvision --index-url https://download.pytorch.org/whl/cu124
83-
84-
pip install diffusers==0.31.0 transformers==4.46.3 onnx==1.17.0 protobuf==5.29.2
85-
96+
pip install diffusers==0.32.0 transformers==4.46.3 onnx==1.17.0 protobuf==5.29.2 py3nvml
8697
install_onnxruntime
87-
8898
install_optimum
8999
}
90100

101+
# Function to run benchmarks
91102
run_benchmark() {
92103
local model=$1
93104
local dir=$2
94105
local version=$3
95106
local steps=$4
96107
local batch=$5
97108

98-
mkdir -p $dir
99-
[ ! -d "$dir/fp32" ] && optimum-cli export onnx --model $model $dir/fp32 --opset 15 --task text-to-image
100-
[ ! -d "$dir/fp16_fp32" ] && python optimize_pipeline.py -i $dir/fp32 -o $dir/fp16_fp32 --float16
101-
[ ! -d "$dir/fp16_bf16" ] && python optimize_pipeline.py -i $dir/fp32 -o $dir/fp16_bf16 --float16 --bfloat16
102-
python benchmark.py -e optimum --height 1024 --width 1024 --steps $steps -b $batch -v $version -p $dir/fp16_fp32
103-
python benchmark.py -e optimum --height 1024 --width 1024 --steps $steps -b $batch -v $version -p $dir/fp16_bf16
104-
python benchmark.py -e torch --height 1024 --width 1024 --steps $steps -b $batch -v $version
105-
python benchmark.py -e torch --height 1024 --width 1024 --steps $steps -b $batch -v $version --enable_torch_compile
109+
log "Running benchmark for model: $model"
110+
mkdir -p "$dir"
111+
[ ! -d "$dir/fp32" ] && optimum-cli export onnx --model "$model" "$dir/fp32" --opset 15 --task text-to-image
112+
[ ! -d "$dir/fp16_fp32" ] && python optimize_pipeline.py -i "$dir/fp32" -o "$dir/fp16_fp32" --float16
113+
[ ! -d "$dir/fp16_bf16" ] && python optimize_pipeline.py -i "$dir/fp32" -o "$dir/fp16_bf16" --float16 --bfloat16
114+
python benchmark.py -e optimum --height 1024 --width 1024 --steps "$steps" -b "$batch" -v "$version" -p "$dir/fp16_fp32"
115+
python benchmark.py -e optimum --height 1024 --width 1024 --steps "$steps" -b "$batch" -v "$version" -p "$dir/fp16_bf16"
116+
python benchmark.py -e torch --height 1024 --width 1024 --steps "$steps" -b "$batch" -v "$version"
117+
python benchmark.py -e torch --height 1024 --width 1024 --steps "$steps" -b "$batch" -v "$version" --enable_torch_compile
106118
}
107119

120+
# Main script execution
108121
install_gpu
109122

110-
mkdir -p $root_dir
123+
log "Creating ONNX model directory: $onnx_dir"
124+
mkdir -p "$onnx_dir"
111125

112-
run_benchmark black-forest-labs/FLUX.1-schnell ${root_dir}/flux1_schnell Flux.1S 4 1 > $root_dir/flux1_schnell_s4_b1.log
113-
run_benchmark black-forest-labs/FLUX.1-dev ${root_dir}/flux1_dev Flux.1D 50 1 > $root_dir/flux1_dev_s50_b1.log
126+
run_benchmark black-forest-labs/FLUX.1-schnell "$onnx_dir/flux1_schnell" Flux.1S 4 1 > "$onnx_dir/flux1_schnell_s4_b1.log"
127+
run_benchmark black-forest-labs/FLUX.1-dev "$onnx_dir/flux1_dev" Flux.1D 50 1 > "$onnx_dir/flux1_dev_s50_b1.log"
128+
run_benchmark stabilityai/stable-diffusion-3.5-large "$onnx_dir/sd3.5_large" 3.5L 50 1 > "$onnx_dir/sd3.5_large_s50_b1.log"
129+
run_benchmark stabilityai/stable-diffusion-3.5-medium "$onnx_dir/sd3.5_medium" 3.5M 50 1 > "$onnx_dir/sd3.5_medium_s50_b1.log"
114130

115-
run_benchmark stabilityai/stable-diffusion-3.5-large ${root_dir}/sd3.5_large 3.5L 50 1 > $root_dir/sd3.5_large_s50_b1.log
116-
run_benchmark stabilityai/stable-diffusion-3.5-medium ${root_dir}/sd3.5_medium 3.5M 50 1 > $root_dir/sd3.5_medium_s50_b1.log
131+
log "Benchmark completed."

onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,7 @@ def _optimize_sd_pipeline(
163163
# export ORT_DEBUG_NODE_IO_DUMP_INPUT_DATA=1
164164
# export ORT_DEBUG_NODE_IO_DUMP_OUTPUT_DATA=1
165165
# python benchmark.py --height 1024 --width 1024 --steps 4 -b 1 -v Flux.1S -p flux1_schnell_onnx/fp32_opt -e optimum >stdout.txt 2>stderr.txt
166-
# Warning: The node name might change in different export settings. We used python 3.10 and the following packages:
167-
# diffusers==0.31.0 transformers==4.46.3 optimum==1.24.0.dev0 torch==2.5.1 onnx==1.17.0 protobuf==5.29.2
166+
# Warning: The node name might change in different export settings. See benchmark_flux.sh for the settings.
168167
flux_node_block_list = {
169168
"text_encoder_2": [
170169
"/encoder/block.10/layer.1/DenseReluDense/wo/MatMul",

0 commit comments

Comments
 (0)