1+ # Test Quantization System Performace on Image Classification Models with ILSVRC2012 Dataset
2+
3+ # Should contains model file(.onnx)
4+ MODEL_DIR = 'QuantZoo/Model/Imagenet'
5+
6+ # Should contains Calib & Test Img Folder
7+ CALIB_DIR = 'QuantZoo/Data/Imagenet/Calib'
8+ TEST_DIR = 'QuantZoo/Data/Imagenet/Test'
9+
10+ # calibration & test batchsize
11+ BATCHSIZE = 32
12+
13+ # Quantizer Configuration
14+ SYMMETRICAL = True
15+ PER_CHANNEL = True
16+ POWER_OF_2 = False
17+ BIT_WIDTH = 8
18+
19+ # write report to here
20+ REPORT_DIR = 'QuantZoo/Reports'
21+
22+ CONFIGS = [
23+ {
24+ 'Model' : 'efficientnet_v1_b0' ,
25+ 'Output' : ['/features/features.8/features.8.2/Mul_output_0' ]
26+ },
27+ {
28+ 'Model' : 'efficientnet_v1_b1' ,
29+ 'Output' : ['/features/features.8/features.8.2/Mul_output_0' ]
30+ },
31+ {
32+ 'Model' : 'efficientnet_v2_s' ,
33+ 'Output' : ['/features/features.7/features.7.2/Mul_output_0' ]
34+ },
35+ {
36+ 'Model' : 'mnasnet0_5' ,
37+ 'Output' : ['/layers/layers.16/Relu_output_0' ]
38+ },
39+ {
40+ 'Model' : 'mnasnet1_0' ,
41+ 'Output' : ['/layers/layers.16/Relu_output_0' ]
42+ },
43+ {
44+ 'Model' : 'mobilenet_v2' ,
45+ 'Output' : ['/features/features.18/features.18.2/Clip_output_0' ]
46+ },
47+ {
48+ 'Model' : 'resnet18' ,
49+ 'Output' : ['/layer4/layer4.1/relu_1/Relu_output_0' ]
50+ },
51+ {
52+ 'Model' : 'resnet50' ,
53+ 'Output' : ['/layer4/layer4.2/relu_2/Relu_output_0' ]
54+ },
55+
56+ {
57+ 'Model' : 'mobilenet_v3_large' ,
58+ 'Output' : ['/classifier/classifier.1/Mul_output_0' ]
59+ },
60+ {
61+ 'Model' : 'mobilenet_v3_small' ,
62+ 'Output' : ['/classifier/classifier.1/Mul_output_0' ]
63+ },
64+ {
65+ 'Model' :
'v100_gpu64@[email protected] _finetune@25' ,
66+ 'Output' : ['471' ]
67+ },
68+ {
69+ 'Model' :
'v100_gpu64@[email protected] _finetune@25' ,
70+ 'Output' : ['471' ]
71+ },
72+ {
73+ # vit_b_16 requires BATCHSIZE = 1!
74+ 'Model' : 'vit_b_16' ,
75+ 'Output' : ['onnx::Gather_1703' ]
76+ }
77+ ]
78+
79+ import os
80+
81+ import torch
82+
83+ import ppq .lib as PFL
84+ from ppq .api import ENABLE_CUDA_KERNEL , load_onnx_graph
85+ from ppq .core import TargetPlatform
86+ from ppq .executor import TorchExecutor
87+ from ppq .quantization .optim import (LayerwiseEqualizationPass ,
88+ LearnedStepSizePass , ParameterQuantizePass ,
89+ RuntimeCalibrationPass )
90+ from QuantZoo .Data .Imagenet .Eval import (evaluate_ppq_module_with_imagenet ,
91+ load_imagenet_from_directory )
92+ from QuantZoo .Quantizers import MyFP8Quantizer , MyInt8Quantizer
93+ from QuantZoo .Util import error_analyze
94+
95+
96+ calib_loader = load_imagenet_from_directory (
97+ directory = CALIB_DIR , batchsize = BATCHSIZE ,
98+ shuffle = False , require_label = False ,
99+ num_of_workers = 8 )
100+
101+
102+ test_loader = load_imagenet_from_directory (
103+ directory = TEST_DIR , batchsize = BATCHSIZE ,
104+ shuffle = False , require_label = True ,
105+ num_of_workers = 8 )
106+
107+
108+ with ENABLE_CUDA_KERNEL ():
109+ for config in CONFIGS :
110+ model = config ['Model' ]
111+ monitoring_vars = config ['Output' ]
112+
113+ print (f"Ready to run quant benchmark on { model } " )
114+ graph = load_onnx_graph (onnx_import_file = os .path .join (MODEL_DIR , model + '.onnx' ))
115+
116+ if model == 'vit_b_16' :
117+ if BATCHSIZE == 32 :
118+ raise Exception ('To Evaluate vit_b_16, change batchsize to 1, change calibration method to minmax.' )
119+ from ppq .IR import GraphMerger
120+ processor = GraphMerger (graph )
121+ processor .fuse_matmul_add ()
122+ processor .fuse_layernorm ()
123+ processor .fuse_gelu ()
124+
125+ quantizer = MyInt8Quantizer (
126+ graph = graph , sym = SYMMETRICAL , power_of_2 = POWER_OF_2 ,
127+ num_of_bits = BIT_WIDTH , per_channel = PER_CHANNEL )
128+ # quantizer = MyFP8Quantizer(graph=graph)
129+
130+ # convert op to quantable-op
131+ for name , op in graph .operations .items ():
132+ if op .type in {'Conv' , 'ConvTranspose' , 'MatMul' , 'Gemm' ,
133+ 'PPQBiasFusedMatMul' , 'LayerNormalization' }:
134+ quantizer .quantize_operation (name , platform = TargetPlatform .INT8 )
135+
136+ # build quant pipeline.
137+ pipeline = PFL .Pipeline ([
138+ # LayerwiseEqualizationPass(iteration=10),
139+ ParameterQuantizePass (),
140+ RuntimeCalibrationPass (),
141+ # LearnedStepSizePass(steps=500, collecting_device='cuda', block_size=5)
142+ ])
143+
144+ # call pipeline.
145+ executor = TorchExecutor (graph = graph )
146+ executor .tracing_operation_meta (torch .zeros (size = [BATCHSIZE , 3 , 224 , 224 ]).cuda ())
147+
148+ pipeline .optimize (
149+ graph = graph , dataloader = calib_loader , verbose = True ,
150+ calib_steps = 32 , collate_fn = lambda x : x .to ('cuda' ), executor = executor )
151+
152+ # evaluation
153+ acc = evaluate_ppq_module_with_imagenet (
154+ model = graph , imagenet_validation_loader = test_loader ,
155+ batchsize = BATCHSIZE , device = 'cuda' , verbose = False )
156+ print (f'Model Classify Accurarcy = { acc : .4f} %' )
157+
158+ # error analyze
159+ performance = error_analyze (
160+ graph = graph ,
161+ outputs = monitoring_vars ,
162+ dataloader = test_loader ,
163+ collate_fn = lambda x : x [0 ].to ('cuda' ),
164+ verbose = True
165+ )
0 commit comments