Skip to content

quantize_input_output failed #394

@HuixiangH

Description

@HuixiangH
import torch
import torch.nn as nn


from tinynn.converter import TFLiteConverter
from tinynn.graph.tracer import model_tracer
from tinynn.graph.quantization.quantizer import PostQuantizer


def test_gru_model():
    class DemoModel(nn.Module):
        def __init__(self,input_size, hidden_size):
            super().__init__()
            self.gru_layer_1 = nn.GRU(input_size=input_size, 
                                    hidden_size=hidden_size,
                                    num_layers=1, batch_first=True,)
            self.gru_layer_2 = nn.GRU(input_size=input_size, 
                                    hidden_size=hidden_size,
                                    num_layers=1, batch_first=True,)
            self.hidden_size = hidden_size

        def forward(self, inputs, hidden_state):
            h1, h2 = torch.chunk(hidden_state, 2, -1)
            output, h1_out = self.gru_layer_1(inputs, h1)
            output, h2_out = self.gru_layer_2(output, h2)
            h_out = torch.concat([h1_out, h2_out], dim=-1)
            return output, h_out
        
    input_size = 20
    hidden_size = 20
    model = DemoModel(input_size, hidden_size).eval().cpu()

    inputs = torch.rand(1, 1, input_size)
    h_state = torch.zeros(1, 1, hidden_size*2)
    dummpy_inputs = (inputs, h_state)
    with model_tracer():
        config = {"asymmetric": False, "per_tensor":False, "rewrite_graph":True}
        quantizer = PostQuantizer(model, dummpy_inputs, work_dir="debug", config=config)
        ptq_model = quantizer.quantize()

        with torch.no_grad():
            for idx in range(10):
                inputs = torch.rand(1, 1, 20)
                output, h_state = ptq_model(inputs, h_state)
        ptq_model = quantizer.convert(ptq_model)
        converter = TFLiteConverter(ptq_model, 
                                    dummpy_inputs, 
                                    tflite_path="debug.tflite",
                                    strict_symmetric_check=True,
                                    quantize_target_type="int16",
                                    tflite_micro_rewrite=True,
                                    group_tensors=True,
                                    nchw_transpose=True,
                                    fuse_quant_dequant=True,
                                    quantize_input_output_type="int16"
                                    )
        converter.convert()

if __name__ == "__main__":
    test_gru_model()

我尝试将DemoModel做PTQ的时候出现如下错误:

Traceback (most recent call last):
  File "/home/TinyNeuralNetwork/tests/test_chunk.py", line 60, in <module>
    test_gru_model()
  File "/home/TinyNeuralNetwork/tests/test_chunk.py", line 57, in test_gru_model
    converter.convert()
  File "/home/TinyNeuralNetwork/tinynn/converter/base.py", line 541, in convert
    optimizer.optimize()
  File "/home/TinyNeuralNetwork/tinynn/converter/operators/optimize.py", line 3613, in optimize
    self.quantize_input_output_type_pass()
  File "/home/TinyNeuralNetwork/tinynn/util/util.py", line 180, in wrapper
    return f(*args, **kwds)
  File "/home/TinyNeuralNetwork/tinynn/converter/operators/optimize.py", line 3099, in quantize_input_output_type_pass
    raise AssertionError(
AssertionError: Unsupported types: input_type: float32, quantize_input_type: int16

出现该错误的原因是h1, h2 = torch.chunk(hidden_state, 2, -1)在convert的时候,hidden_state在经过Quantize算子之前,先经过了一个Reshape算子,进而导致了qunatize_input_output失败。这个问题是可以修改代码做规避,但是我仍然想要知道这个位置为什么会多出来一个Reshape算子。

Note 1:quantize_input_output_type=None能够正常导出,可以查看到hidden_state对应的input的算子顺序为:input -> Reshape ->Quantize -> Reshape -> Split
谢谢~

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions