|
4 | 4 | # Licensed under the MIT License. |
5 | 5 | # ------------------------------------------------------------------------- |
6 | 6 |
|
7 | | -# Please run this script under conda or virtual environment with Python 3.10, 3.11 or 3.12. |
8 | | -# bash benchmark_flux.sh <install_dir> <onnx_dir> |
| 7 | +set -euo pipefail |
9 | 8 |
|
10 | | -# Installation directory (default: $HOME) |
11 | | -install_dir="${1:-$HOME}" |
| 9 | +# Script to benchmark Flux models with ONNX and PyTorch |
| 10 | +# Usage: bash benchmark_flux.sh <install_dir> <onnx_dir> |
| 11 | + |
| 12 | +# Validate inputs and environment |
| 13 | +command -v python3 &>/dev/null || { echo "Python3 is required but not installed."; exit 1; } |
| 14 | +command -v wget &>/dev/null || { echo "wget is required but not installed."; exit 1; } |
12 | 15 |
|
13 | | -# Root directory for the onnx models |
| 16 | +# Input arguments with defaults |
| 17 | +install_dir="${1:-$HOME}" |
14 | 18 | onnx_dir="${2:-onnx_models}" |
15 | 19 |
|
16 | | -# Which GPU to use |
| 20 | +# GPU settings |
17 | 21 | export CUDA_VISIBLE_DEVICES=0 |
18 | 22 |
|
19 | | -# Function to install CUDA 12.6 |
20 | | -install_cuda_12() |
21 | | -{ |
22 | | - pushd $install_dir |
23 | | - wget https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.35.03_linux.run |
24 | | - sh cuda_12.6.2_560.35.03_linux.run --toolkit --toolkitpath=$install_dir/cuda12.6 --silent --override --no-man-page |
| 23 | +# Function to log messages |
| 24 | +log() { |
| 25 | + echo -e "\033[1;32m[INFO]\033[0m $1" |
| 26 | +} |
25 | 27 |
|
| 28 | +# Function to install CUDA 12.6 |
| 29 | +install_cuda_12() { |
| 30 | + log "Installing CUDA 12.6" |
| 31 | + pushd "$install_dir" |
| 32 | + wget -q https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.35.03_linux.run |
| 33 | + sh cuda_12.6.2_560.35.03_linux.run --toolkit --toolkitpath="$install_dir/cuda12.6" --silent --override --no-man-page |
26 | 34 | export PATH="$install_dir/cuda12.6/bin:$PATH" |
27 | 35 | export LD_LIBRARY_PATH="$install_dir/cuda12.6/lib64:$LD_LIBRARY_PATH" |
28 | 36 | popd |
29 | 37 | } |
30 | 38 |
|
31 | 39 | # Function to install cuDNN 9.6 |
32 | 40 | install_cudnn_9() { |
| 41 | + log "Installing cuDNN 9.6" |
33 | 42 | pushd "$install_dir" |
34 | | - wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-9.6.0.74_cuda12-archive.tar.xz |
| 43 | + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-9.6.0.74_cuda12-archive.tar.xz |
35 | 44 | mkdir -p "$install_dir/cudnn9.6" |
36 | | - tar -Jxvf cudnn-linux-x86_64-9.6.0.74_cuda12-archive.tar.xz -C "$install_dir/cudnn9.6"--strip=1 |
37 | | - export LD_LIBRARY_PATH="$install_dir/cudnn9.5/lib:$LD_LIBRARY_PATH" |
| 45 | + tar -Jxvf cudnn-linux-x86_64-9.6.0.74_cuda12-archive.tar.xz -C "$install_dir/cudnn9.6" --strip=1 |
| 46 | + export LD_LIBRARY_PATH="$install_dir/cudnn9.6/lib:$LD_LIBRARY_PATH" |
38 | 47 | popd |
39 | 48 | } |
40 | 49 |
|
41 | | -# Install optimum from source before 1.24 is released |
| 50 | +# Function to install optimum |
42 | 51 | install_optimum() { |
43 | | - pushd "$install_dir" |
| 52 | + log "Installing Optimum" |
44 | 53 | optimum_dir="$install_dir/optimum" |
45 | 54 | if [ ! -d "$optimum_dir" ]; then |
46 | | - git clone https://github.com/huggingface/optimum |
| 55 | + git clone https://github.com/huggingface/optimum "$optimum_dir" |
47 | 56 | fi |
48 | | - cd "$sam2_dir" |
49 | | - pip show optimum > /dev/null 2>&1 || pip install -e . |
| 57 | + pushd "$optimum_dir" |
| 58 | + pip show optimum &>/dev/null || pip install -e . |
50 | 59 | popd |
51 | 60 | } |
52 | 61 |
|
53 | | -# Install onnxruntime-gpu from source before 1.21 is released |
| 62 | +# Function to build and install ONNX Runtime |
54 | 63 | install_onnxruntime() { |
| 64 | + log "Building ONNX Runtime" |
55 | 65 | pushd "$install_dir" |
56 | | - if ! [ -d onnxruntime ]; then |
| 66 | + if [ ! -d onnxruntime ]; then |
57 | 67 | git clone https://github.com/microsoft/onnxruntime |
58 | 68 | fi |
59 | | - cd onnxruntime |
| 69 | + pushd onnxruntime |
60 | 70 | CUDA_ARCH=$(python3 -c "import torch; cc = torch.cuda.get_device_capability(); print(f'{cc[0]}{cc[1]}')") |
61 | | - if [ -n "$CUDA_ARCH" ]; then |
62 | | - pip install --upgrade pip cmake psutil setuptools wheel packaging ninja numpy==2.2 |
63 | | - sh build.sh --config Release --build_dir build/cuda12 --build_shared_lib --parallel \ |
64 | | - --use_cuda --cuda_version 12.6 --cuda_home $install_dir/cuda12.6 \ |
65 | | - --cudnn_home $install_dir/cudnn9.6 \ |
66 | | - --build_wheel --skip_tests \ |
67 | | - --cmake_generator Ninja \ |
68 | | - --compile_no_warning_as_error \ |
69 | | - --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=$CUDA_ARCH |
70 | | - pip install build/cuda12/Release/dist/onnxruntime_gpu-*-linux_x86_64.whl |
71 | | - else |
| 71 | + if [ -z "$CUDA_ARCH" ]; then |
72 | 72 | echo "No CUDA device found." |
73 | 73 | exit 1 |
74 | 74 | fi |
| 75 | + pip install --upgrade pip cmake psutil setuptools wheel packaging ninja numpy==2.2 |
| 76 | + sh build.sh --config Release --build_dir build/cuda12 --parallel \ |
| 77 | + --use_cuda --cuda_version 12.6 --cuda_home "$install_dir/cuda12.6" \ |
| 78 | + --cudnn_home "$install_dir/cudnn9.6" \ |
| 79 | + --build_wheel --skip_tests \ |
| 80 | + --cmake_generator Ninja \ |
| 81 | + --compile_no_warning_as_error \ |
| 82 | + --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF CMAKE_CUDA_ARCHITECTURES="$CUDA_ARCH" |
| 83 | + |
| 84 | + log "Installing ONNX Runtime" |
| 85 | + pip install build/cuda12/Release/dist/onnxruntime_gpu-*-linux_x86_64.whl |
| 86 | + popd |
75 | 87 | popd |
76 | 88 | } |
77 | 89 |
|
78 | | -# Install GPU dependencies |
| 90 | +# Function to install GPU dependencies |
79 | 91 | install_gpu() { |
| 92 | + log "Installing GPU dependencies" |
80 | 93 | [ ! -d "$install_dir/cuda12.6" ] && install_cuda_12 |
81 | 94 | [ ! -d "$install_dir/cudnn9.6" ] && install_cudnn_9 |
82 | 95 | pip install torch torchvision --index-url https://download.pytorch.org/whl/cu124 |
83 | | - |
84 | | - pip install diffusers==0.31.0 transformers==4.46.3 onnx==1.17.0 protobuf==5.29.2 |
85 | | - |
| 96 | + pip install diffusers==0.32.0 transformers==4.46.3 onnx==1.17.0 protobuf==5.29.2 py3nvml |
86 | 97 | install_onnxruntime |
87 | | - |
88 | 98 | install_optimum |
89 | 99 | } |
90 | 100 |
|
| 101 | +# Function to run benchmarks |
91 | 102 | run_benchmark() { |
92 | 103 | local model=$1 |
93 | 104 | local dir=$2 |
94 | 105 | local version=$3 |
95 | 106 | local steps=$4 |
96 | 107 | local batch=$5 |
97 | 108 |
|
98 | | - mkdir -p $dir |
99 | | - [ ! -d "$dir/fp32" ] && optimum-cli export onnx --model $model $dir/fp32 --opset 15 --task text-to-image |
100 | | - [ ! -d "$dir/fp16_fp32" ] && python optimize_pipeline.py -i $dir/fp32 -o $dir/fp16_fp32 --float16 |
101 | | - [ ! -d "$dir/fp16_bf16" ] && python optimize_pipeline.py -i $dir/fp32 -o $dir/fp16_bf16 --float16 --bfloat16 |
102 | | - python benchmark.py -e optimum --height 1024 --width 1024 --steps $steps -b $batch -v $version -p $dir/fp16_fp32 |
103 | | - python benchmark.py -e optimum --height 1024 --width 1024 --steps $steps -b $batch -v $version -p $dir/fp16_bf16 |
104 | | - python benchmark.py -e torch --height 1024 --width 1024 --steps $steps -b $batch -v $version |
105 | | - python benchmark.py -e torch --height 1024 --width 1024 --steps $steps -b $batch -v $version --enable_torch_compile |
| 109 | + log "Running benchmark for model: $model" |
| 110 | + mkdir -p "$dir" |
| 111 | + [ ! -d "$dir/fp32" ] && optimum-cli export onnx --model "$model" "$dir/fp32" --opset 15 --task text-to-image |
| 112 | + [ ! -d "$dir/fp16_fp32" ] && python optimize_pipeline.py -i "$dir/fp32" -o "$dir/fp16_fp32" --float16 |
| 113 | + [ ! -d "$dir/fp16_bf16" ] && python optimize_pipeline.py -i "$dir/fp32" -o "$dir/fp16_bf16" --float16 --bfloat16 |
| 114 | + python benchmark.py -e optimum --height 1024 --width 1024 --steps "$steps" -b "$batch" -v "$version" -p "$dir/fp16_fp32" |
| 115 | + python benchmark.py -e optimum --height 1024 --width 1024 --steps "$steps" -b "$batch" -v "$version" -p "$dir/fp16_bf16" |
| 116 | + python benchmark.py -e torch --height 1024 --width 1024 --steps "$steps" -b "$batch" -v "$version" |
| 117 | + python benchmark.py -e torch --height 1024 --width 1024 --steps "$steps" -b "$batch" -v "$version" --enable_torch_compile |
106 | 118 | } |
107 | 119 |
|
| 120 | +# Main script execution |
108 | 121 | install_gpu |
109 | 122 |
|
110 | | -mkdir -p $root_dir |
| 123 | +log "Creating ONNX model directory: $onnx_dir" |
| 124 | +mkdir -p "$onnx_dir" |
111 | 125 |
|
112 | | -run_benchmark black-forest-labs/FLUX.1-schnell ${root_dir}/flux1_schnell Flux.1S 4 1 > $root_dir/flux1_schnell_s4_b1.log |
113 | | -run_benchmark black-forest-labs/FLUX.1-dev ${root_dir}/flux1_dev Flux.1D 50 1 > $root_dir/flux1_dev_s50_b1.log |
| 126 | +run_benchmark black-forest-labs/FLUX.1-schnell "$onnx_dir/flux1_schnell" Flux.1S 4 1 > "$onnx_dir/flux1_schnell_s4_b1.log" |
| 127 | +run_benchmark black-forest-labs/FLUX.1-dev "$onnx_dir/flux1_dev" Flux.1D 50 1 > "$onnx_dir/flux1_dev_s50_b1.log" |
| 128 | +run_benchmark stabilityai/stable-diffusion-3.5-large "$onnx_dir/sd3.5_large" 3.5L 50 1 > "$onnx_dir/sd3.5_large_s50_b1.log" |
| 129 | +run_benchmark stabilityai/stable-diffusion-3.5-medium "$onnx_dir/sd3.5_medium" 3.5M 50 1 > "$onnx_dir/sd3.5_medium_s50_b1.log" |
114 | 130 |
|
115 | | -run_benchmark stabilityai/stable-diffusion-3.5-large ${root_dir}/sd3.5_large 3.5L 50 1 > $root_dir/sd3.5_large_s50_b1.log |
116 | | -run_benchmark stabilityai/stable-diffusion-3.5-medium ${root_dir}/sd3.5_medium 3.5M 50 1 > $root_dir/sd3.5_medium_s50_b1.log |
| 131 | +log "Benchmark completed." |
0 commit comments