-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathnn_quant_export.py
191 lines (158 loc) · 8.72 KB
/
nn_quant_export.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# 量化后模型参数导出, 以便在FPGA上实现推理
import os.path
import numpy as np
import torch
from model import cifar10_net
def quant_export(network_class_quant, quant_model_dict_path: str,
quant_dict={}, scale_dict={}, fix_point_dict={}, zero_point_dict={},
txt_path_dir: str = "txt", coe_path_dir: str = "coe"):
# network_class_quant: 加入量化节点的浮点网络实例
# quant_model_dict_path: 量化后模型参数字典路径
# 后续FPGA运算参考模拟程序nn_forward_verilog、输入图片量化程序img_quant_export中会用到的各种参数字典如下:
# quant_dict: 保存各层量化参数(scale\zero_point)的字典
# scale_dict: 保存各层输入数据(activation)scale值的字典
# zero_point_dict: 保存各层输入数据(activation)zero_point值的字典
# fix_point_dict: 将系数M=(scale_in * scale_weight/scale_out)这一浮点数进行定点量化后的结果, 保存到该字典
# txt_path_dir: 保存txt文件的文件夹路径名称
# 创建相关文件夹
if not os.path.exists("./{}".format(txt_path_dir)):
os.makedirs("./{}".format(txt_path_dir))
if not os.path.exists("./{}".format(coe_path_dir)):
os.makedirs("./{}".format(coe_path_dir))
# 量化后模型的加载,与pytorch量化处理保持一致
model_fp32 = network_class_quant
state_dict = torch.load(quant_model_dict_path)
model_fp32.qconfig = torch.quantization.get_default_qconfig('x86')
load_model_prepared = torch.quantization.prepare(model_fp32)
model_int8 = torch.quantization.convert(load_model_prepared)
model_int8.load_state_dict(state_dict)
bias_dict = {} # 保存各层浮点偏置bias的字典, 对此字典的值进行提前量化, 最后保存为txt文档
# 量化后模型参数导出
for key in state_dict:
# print(key)
if "bias" in key:
# 先将bias存在字典中,后续将会进一步结合相关scale进行量化后保存
bias_dict[key] = state_dict[key].detach()
if "weight" in key: # 提取量化后权重到txt文件,文件保存为1列,
# 保存量化信息(scale、zero_point)到字典
quant_dict[key + ".scale"] = state_dict[key].q_scale()
quant_dict[key + ".zero_point"] = state_dict[key].q_zero_point()
weight_int = state_dict[key].int_repr()
weight_int = torch.reshape(weight_int, (-1, 1))
# 转成uint8是因为np.savetxt只支持无符号16进制保存,实际上二进制值并不改变
weight_int_np = weight_int.numpy().astype("uint8")
np.savetxt("./{}/{}_int8.txt".format(txt_path_dir, key), weight_int_np, fmt="%02x")
# 保存为coe文件
coe_path = "./{}/{}_int8.coe".format(coe_path_dir, key)
write_np2coe(path=coe_path, data_np=weight_int_np, radix=16, fmt="{:02x}")
# 获取各层输出的Zero_point & scale, 保存到量化参数字典
# 注意这里输出的是各层输出结果的量化参数, 而不是权重的量化参数
if ("scale" in key) or ("zero_point" in key):
# 替换字符串,相当于在原来的key中间插入.out,方便后续辨识
key_replace = key.replace(".scale", ".out.scale").replace(".zero_point", ".out.zero_point")
quant_dict[key_replace] = state_dict[key].item()
# 某些层(线性层)的模型参数形式是元组
if ("_packed_params" in key) and ("dtype" not in key):
w, b = state_dict[key]
bias_key = key.replace("_packed_params._packed_params", "bias")
bias_dict[bias_key] = b.detach()
# 保存权重w对应的scale和zero_point
quant_dict[key.replace("_packed_params._packed_params", "weight.scale")] = w.q_scale()
quant_dict[key.replace("_packed_params._packed_params", "weight.zero_point")] = w.q_zero_point()
# 对权重进行处理, 保存为1列txt
weight_int = torch.reshape(w.int_repr(), (-1, 1))
weight_int_np = weight_int.numpy().astype("uint8")
# print(weight_int_np)
txt_path = "./{}/".format(txt_path_dir) + key.replace("_packed_params._packed_params", "") + \
"weight_int8.txt"
np.savetxt(txt_path, weight_int_np, fmt="%02x")
# 保存为coe文件
coe_path = "./{}/".format(coe_path_dir) + key.replace("_packed_params._packed_params", "") + \
"weight_int8.coe"
write_np2coe(path=coe_path, data_np=weight_int_np, radix=16, fmt="{:02x}")
# 对bias进行量化处理需要用到两个输入变量的scale相乘(s1*s2),
# 为了方便使用字典key,将原来使用previous_layer.out.scale作为key的字典重新改为以next_layer.in.scale
# 这样可以通过next_layer名称索引到scale_dict[next_layer.in.scale]以及scale_dict[next_layer.weight.scale],将两者相乘即为bias量化用到的scale
# 定义神经网络需要用到的scale的OP顺序列表
nn_op_order = ["quant", "conv1", "conv2", "conv3", "linear1", "linear2"]
# scale字典创建更名
# 实际上实现了以下key的更名:
# quant.out.scale --> conv1.in.scale
# conv1.out.scale --> conv2.in.scale
# -----------------------------etc----
# previous_layer.out.scale --> next_layer.in.scale
# zero_point同理
i = 1
for op in nn_op_order:
if op != nn_op_order[-1]:
scale_dict[nn_op_order[i] + ".in.scale"] = quant_dict[op + ".out.scale"]
zero_point_dict[nn_op_order[i] + ".in.zero_point"] = quant_dict[op + ".out.zero_point"]
i = i + 1
# 依据scale字典对bias(float32)进行量化,并保存至txt文件,以便FPGA调用
for key in bias_dict:
b = bias_dict[key]
s1 = scale_dict[key.replace(".bias", ".in.scale")] # 来自前一级input的scale
s2 = quant_dict[key.replace(".bias", ".weight.scale")] # 自身op权重weight的scale
# 量化
b_q = torch.quantize_per_tensor(b, scale=s1 * s2, zero_point=0, dtype=torch.qint32)
b_q_int = b_q.int_repr().numpy().astype("uint32")
# 保存到txt
np.savetxt("./{}/".format(txt_path_dir) + key + "_int32.txt", b_q_int, fmt="%08x")
# 保存到coe
coe_path = "./{}/".format(coe_path_dir) + key + "_int32.coe"
write_np2coe(path=coe_path, data_np=b_q_int, radix=16, fmt="{:08x}")
# 依据scale字典, 计算[s1(in.scale)*s2(weight.scale)]/s3(out.scale)经过n位定点量化后的值,然后保存到字典
n = 16
for key in scale_dict:
# print(key)
s1 = scale_dict[key]
s2 = quant_dict[key.replace("in.scale", "weight.scale")]
s3 = quant_dict[key.replace("in.scale", "out.scale")]
M = s1 * s2 / s3
M_int = int(M * 2**n) # 定点量化后的值
fix_point_dict[key.replace(".in", ".fix")] = M_int
# 逐条查看字典值
def view_dict(dict):
for key in dict:
print("{}: {}".format(key, dict[key]))
def write_np2coe(path: str, data_np, radix: int = 16, fmt: str = "{:02x}"):
# 将numpy数据转换为vivado ROM 初始化coe文件
# path: 欲保存的路径
# radix: 保存的进制
# fmt: 格式指定符
with open(path, "w") as f:
# coe文件的文件头
coe_title1 = "MEMORY_INITIALIZATION_RADIX = {};".format(radix) # 进制
coe_title2 = "MEMORY_INITIALIZATION_VECTOR = "
# 写入coe文件头
f.write(coe_title1 + "\n")
f.write(coe_title2 + "\n")
# 写入中间数据, 以","结尾
for i in range(len(data_np) - 1):
data = int(data_np[i])
f.write(("{},\n".format(fmt)).format(data))
# 写入最后一个数据, 以";"结尾
data = int(data_np[-1])
f.write(("{};".format(fmt)).format(data))
if __name__ == "__main__":
quant_dict_path = "./model_pth/model_int8.pth"
model_fp32 = cifar10_net(is_quant=True)
scale_dict = {}
zero_point_dict = {}
quant_dict = {}
fix_point_dict = {}
quant_export(network_class_quant=model_fp32, quant_model_dict_path=quant_dict_path,
quant_dict=quant_dict, scale_dict=scale_dict, zero_point_dict=zero_point_dict,
fix_point_dict=fix_point_dict)
# 查看各层量化参数
print("_________________________quant_____________________________")
view_dict(quant_dict)
# 查看scale字典数据
print("_________________________scale_____________________________")
view_dict(scale_dict)
# 查看fix_point字典数据
print("_________________________fix_____________________________")
view_dict(fix_point_dict)
# zero_point
print("__________________________zero___________________________")
view_dict(zero_point_dict)