-
Notifications
You must be signed in to change notification settings - Fork 132
Open
Labels
bugSomething isn't workingSomething isn't working
Description
import torch
import torch.nn as nn
from tinynn.converter import TFLiteConverter
from tinynn.graph.tracer import model_tracer
from tinynn.graph.quantization.quantizer import PostQuantizer
def test_gru_model():
class DemoModel(nn.Module):
def __init__(self,input_size, hidden_size):
super().__init__()
self.gru_layer_1 = nn.GRU(input_size=input_size,
hidden_size=hidden_size,
num_layers=1, batch_first=True,)
self.gru_layer_2 = nn.GRU(input_size=input_size,
hidden_size=hidden_size,
num_layers=1, batch_first=True,)
self.hidden_size = hidden_size
def forward(self, inputs, hidden_state):
h1, h2 = torch.chunk(hidden_state, 2, -1)
output, h1_out = self.gru_layer_1(inputs, h1)
output, h2_out = self.gru_layer_2(output, h2)
h_out = torch.concat([h1_out, h2_out], dim=-1)
return output, h_out
input_size = 20
hidden_size = 20
model = DemoModel(input_size, hidden_size).eval().cpu()
inputs = torch.rand(1, 1, input_size)
h_state = torch.zeros(1, 1, hidden_size*2)
dummpy_inputs = (inputs, h_state)
with model_tracer():
config = {"asymmetric": False, "per_tensor":False, "rewrite_graph":True}
quantizer = PostQuantizer(model, dummpy_inputs, work_dir="debug", config=config)
ptq_model = quantizer.quantize()
with torch.no_grad():
for idx in range(10):
inputs = torch.rand(1, 1, 20)
output, h_state = ptq_model(inputs, h_state)
ptq_model = quantizer.convert(ptq_model)
converter = TFLiteConverter(ptq_model,
dummpy_inputs,
tflite_path="debug.tflite",
strict_symmetric_check=True,
quantize_target_type="int16",
tflite_micro_rewrite=True,
group_tensors=True,
nchw_transpose=True,
fuse_quant_dequant=True,
quantize_input_output_type="int16"
)
converter.convert()
if __name__ == "__main__":
test_gru_model()
我尝试将DemoModel做PTQ的时候出现如下错误:
Traceback (most recent call last):
File "/home/TinyNeuralNetwork/tests/test_chunk.py", line 60, in <module>
test_gru_model()
File "/home/TinyNeuralNetwork/tests/test_chunk.py", line 57, in test_gru_model
converter.convert()
File "/home/TinyNeuralNetwork/tinynn/converter/base.py", line 541, in convert
optimizer.optimize()
File "/home/TinyNeuralNetwork/tinynn/converter/operators/optimize.py", line 3613, in optimize
self.quantize_input_output_type_pass()
File "/home/TinyNeuralNetwork/tinynn/util/util.py", line 180, in wrapper
return f(*args, **kwds)
File "/home/TinyNeuralNetwork/tinynn/converter/operators/optimize.py", line 3099, in quantize_input_output_type_pass
raise AssertionError(
AssertionError: Unsupported types: input_type: float32, quantize_input_type: int16
出现该错误的原因是h1, h2 = torch.chunk(hidden_state, 2, -1)在convert的时候,hidden_state在经过Quantize算子之前,先经过了一个Reshape算子,进而导致了qunatize_input_output失败。这个问题是可以修改代码做规避,但是我仍然想要知道这个位置为什么会多出来一个Reshape算子。
Note 1:quantize_input_output_type=None能够正常导出,可以查看到hidden_state对应的input的算子顺序为:input -> Reshape ->Quantize -> Reshape -> Split。
谢谢~
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working