Skip to content

Квантизация PaddlePaddle #575

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements_frameworks.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dglgo==0.0.2

tflite

paddleslim==2.6.0
paddlepaddle==2.6.0
--extra-index-url https://mirror.baidu.com/pypi/simple

Expand Down
28 changes: 28 additions & 0 deletions src/configs/paddle_quantization_config_template.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?xml version="1.0" encoding="utf-8"?>
<QuantizationConfigs>
<Config>
<Model>
<Name></Name>
<PathPrefix></PathPrefix>
<ModelDir></ModelDir>
<ModelFileName></ModelFileName>
<ParamsFileName></ParamsFileName>
</Model>
<Dataset>
<Name></Name>
<Path></Path>
<Mean></Mean>
<Std></Std>
<ChannelSwap></ChannelSwap>
<BatchSize></BatchSize>
<BatchNum></BatchNum>
<ResizeResolution></ResizeResolution>
</Dataset>
<QuantizationParameters>
<InputShape></InputShape>
<InputName></InputName>
<SaveDir></SaveDir>
<Algorithm></Algorithm>
</QuantizationParameters>
</Config>
</QuantizationConfigs>
43 changes: 43 additions & 0 deletions src/quantization/paddlepaddle/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# PaddlePaddle quantization script

Script name:

```bash
quantization_paddlepaddle.py
```

Required arguments:

- `-c / --config` is a path to the file containing information
about quantization process in the xml-format. Template of the configuration file
located [here][config_path].

Description of parameters:

`Model` contains information about model to be quantized:
- `Name` is a name of the model.
- `PathPrefix` is a path to the model files without the extensions (.pdmodel, .pdiparams).
- `ModelDir` is a directory with the model.
- `ModelFileName` is a file name of the model description.
- `ParamsFileName` is a file name of the model parameters.

`Dataset` contains information about dataset for the model calibration:
- `Name` is a dataset name.
- `Path` is a path to the directory that contains input data.
- `Mean` is a mean value for preprocessing data.
- `Std` is a scale value for preprocessing data.
- `ChannelSwap` is a flag to transpose for image channels. For RGB - 2, 1, 0. For BGR - 0, 1, 2.
- `ResizeResolution` is an image size for preprocessing data. Example: 224, 224.
- `BatchSize` is a batch size.
- `BatchNum` is the total number of batches

`QuantizationParameters` contains information about the model input layer:
- `InputShape` is a shape of the model's input layer.
- `InputName` is a name of the model's input layer.
- `SaveDir` is a directory for the quantized model to be saved.
- `Algorithm` specifies method to calculate the quantization scale factor.
Available: 'KL', 'hist', 'mse', 'avg', 'abs_max'. Default: 'hist'.


<!-- LINKS -->
[config_path]: ../../configs/paddle_quantization_config_template.xml
Empty file.
134 changes: 134 additions & 0 deletions src/quantization/paddlepaddle/parameters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import sys
from pathlib import Path
import random
import numpy as np
import paddle
from paddle.io import Dataset
import ast
from paddle.io import DataLoader
from paddleslim.quant import quant_post_static
import importlib
sys.path.append(str(Path(__file__).resolve().parents[1]))
from utils import ArgumentsParser # noqa: E402


class PaddleDatasetReader(Dataset):
def __init__(self, args, log):
super(PaddleDatasetReader, self).__init__()
self.log = log
self.log.info('Parsing dataset arguments.')
self.cv2 = importlib.import_module('cv2')
self.data_dir = args['Path']

self.resize_size = ast.literal_eval(args['ResizeResolution'])
self.mean = np.array((np.asarray(ast.literal_eval(args['Mean']), dtype=np.float32)
if args['Mean'] is not None else [0., 0., 0.])).reshape((3, 1, 1))
self.std = np.array((np.asarray(ast.literal_eval(args['Std']), dtype=np.float32)
if args['Std'] is not None else [1., 1., 1.])).reshape((3, 1, 1))
self.channel_swap = ast.literal_eval(args['ChannelSwap']) if args['ChannelSwap'] is not None else [2, 0, 1]
self.batch_size = int(args['BatchSize'])
self.batch_num = int(args['BatchNum'])
self.dataset = list(Path(self.data_dir).glob('*'))
random.shuffle(self.dataset)
self.dataset_iter = iter(self.dataset)

def __getitem__(self, index):
image_path = str(self.dataset[index].absolute())
data = self.process_image(image_path)
return data

def __len__(self):
return len(self.dataset)

def process_image(self, image_path):

img = self.cv2.imread(image_path)
if img.size == 0:
self.log.info('failed to read:', image_path)
return None
img = self.cv2.resize(img, self.resize_size)

img = img.astype('float32').transpose(tuple(self.channel_swap)) / 255
img -= self.mean
img /= self.std

return img


class PaddleQuantizationProcess:
def __init__(self, log, model_reader, dataset, quant_params):
self.log = log
self.model_reader = model_reader
self.dataset = dataset
self.quant_params = quant_params

def transform_fn(self):
for data in self.dataset:
yield [data.astype(np.float32)]

def quantization_paddle(self):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
data_loader = DataLoader(
self.dataset,
places=place,
feed_list=[self.quant_params.image],
drop_last=False,
return_list=False,
batch_size=self.dataset.batch_size,
shuffle=False)

quant_post_static(
executor=exe,
model_dir=self.model_reader.model_dir,
quantize_model_path=self.quant_params.save_dir,
data_loader=data_loader,
model_filename=self.model_reader.model_filename,
params_filename=self.model_reader.params_filename,
batch_size=self.dataset.batch_size,
batch_nums=self.dataset.batch_num,
algo=self.quant_params.algo,
round_type='round',
hist_percent=0.9999,
is_full_quantize=False,
bias_correction=False,
onnx_format=False)


class PaddleModelReader(ArgumentsParser):
def __init__(self, log):
super().__init__(log)

def _get_arguments(self):
self._log.info('Parsing model arguments.')
self.model_name = self.args['Name']
self.path_prefix = self.args['PathPrefix']
self.model_dir = self.args['ModelDir']
self.model_filename = self.args['ModelFileName']
self.params_filename = self.args['ParamsFileName']

def dict_for_iter_log(self):
return {
'Name': self.model_name,
'Model path prefix': self.path_prefix,
}


class PaddleQuantParamReader(ArgumentsParser):
def __init__(self, log):
super().__init__(log)

def dict_for_iter_log(self):
return {
'InputShape': self.input_shape,
'InputName': self.input_name,
'SaveDir': self.save_dir,
'Algorithm': self.algo,
}

def _get_arguments(self):
self.input_shape = ast.literal_eval(self.args['InputShape'])
self.image = paddle.static.data(name=self.args['InputName'], shape=[None] + self.input_shape, dtype='float32')
self.input_name = self.args['InputName']
self.save_dir = self.args['SaveDir']
self.algo = self.args['Algorithm']
55 changes: 55 additions & 0 deletions src/quantization/paddlepaddle/quantization_paddlepaddle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import paddle
import argparse
import sys
import traceback
from pathlib import Path
from parameters import PaddleModelReader, PaddleDatasetReader, PaddleQuantizationProcess, PaddleQuantParamReader
sys.path.append(str(Path(__file__).resolve().parents[3]))
from src.utils.logger_conf import configure_logger # noqa: E402
from src.quantization.utils import ConfigParser # noqa: E402


log = configure_logger()


def cli_argument_parser():
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--config',
help='Path to the configuration file in the xml-format.',
type=str,
required=True,
dest='config')
args = parser.parse_args()
return args


def main():
args = cli_argument_parser()
try:
log.info(f'Parsing the configuration file {args.config}')
parser = ConfigParser(args.config)
paddle.enable_static()
config = parser.parse()
exit_code = 0
quant_params = PaddleQuantParamReader(log)
model_reader = PaddleModelReader(log)
for model_quant_config in config:
try:
data_reader = PaddleDatasetReader(model_quant_config[1]['Dataset'], log)
model_reader.add_arguments(model_quant_config[0]['Model'])
quant_params.add_arguments(model_quant_config[2]['QuantizationParameters'])
proc = PaddleQuantizationProcess(log, model_reader, data_reader, quant_params)
proc.quantization_paddle()

except Exception:
log.error(traceback.format_exc())
exit_code += 1
if exit_code:
sys.exit(1)
except Exception:
log.error(traceback.format_exc())
sys.exit(1)


if __name__ == '__main__':
sys.exit(main() or 0)
1 change: 1 addition & 0 deletions tests/smoke_test/benchmark_smoke/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def download_dgl_models(output_dir: Path = OUTPUT_DIR):

def download_resnet50_paddle(output_dir: Path = OUTPUT_DIR):
resnet_dir = Path(output_dir, 'resnet50_paddle')
print(resnet_dir)
resnet_paddle_link = ('https://raw.githubusercontent.com/itlab-vision/itlab-vision-dl-benchmark-models/main/'
'paddlepaddle_models/resnet50/resnet50/')
download_file(resnet_paddle_link + '/inference.pdiparams', resnet_dir, 'resnet50.pdiparams')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?xml version="1.0" encoding="utf-8"?>
<QuantizationConfigs>
<Config>
<Model>
<Name>resnet50-paddle</Name>
<PathPrefix>../models_dir/resnet50_paddle/inference</PathPrefix>
<ModelDir>../models_dir/resnet50_paddle</ModelDir>
<ModelFileName>resnet50.pdmodel</ModelFileName>
<ParamsFileName>resnet50.pdiparams</ParamsFileName>
</Model>
<Dataset>
<Name>test</Name>
<Path>../test_images/classification_images</Path>
<Mean>[123.675, 116.28, 103.53]</Mean>
<Std>[58.395, 57.12, 57.375]</Std>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Не хватает параметра channel_swap

<ChannelSwap></ChannelSwap>
<BatchSize>1</BatchSize>
<BatchNum>10</BatchNum>
<ResizeResolution>[224, 224]</ResizeResolution>
</Dataset>
<QuantizationParameters>
<InputShape>[3, 224, 224]</InputShape>
<InputName>inputs</InputName>
<SaveDir>res_dir</SaveDir>
<Algorithm>avg</Algorithm>
</QuantizationParameters>
</Config>
</QuantizationConfigs>
2 changes: 2 additions & 0 deletions tests/smoke_test/quantization_smoke/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from tests.smoke_test.utils import execute_process
from tests.smoke_test.conftest import (SCRIPT_DIR, OUTPUT_DIR, log,
download_models, convert_models)
from tests.smoke_test.benchmark_smoke.conftest import download_resnet50_paddle

QUANTIZATION_CONFIG_DIR_PATH = Path(SCRIPT_DIR, 'configs', 'quantization_models')
TVM_CONVERTER = Path.joinpath(SCRIPT_DIR.parents[1], 'src/model_converters/tvm_converter/tvm_converter.py')
Expand Down Expand Up @@ -43,6 +44,7 @@ def prepare_dl_models(request, overrided_models):
models_per_mark = DL_MODELS
enabled_models = overrided_models if overrided_models else models_per_mark

download_resnet50_paddle()
download_models(models_list=enabled_models)
convert_models(models_list=enabled_models)
convert_models_to_tvm()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
QUANTIZATION_TFLITE = Path.joinpath(SCRIPT_DIR.parents[1], 'src/quantization/tflite/quantization_tflite.py')
QUANTIZATION_TVM = Path.joinpath(SCRIPT_DIR.parents[1], 'src/quantization/tvm/quantization_tvm.py')
QUANTIZATION_NNCF = Path.joinpath(SCRIPT_DIR.parents[1], 'src/quantization/nncf/quantization_nncf.py')
QUANTIZATION_PADDLE = Path.joinpath(SCRIPT_DIR.parents[1],
'src/quantization/paddlepaddle/quantization_paddlepaddle.py')

TVM_CONVERTER = Path.joinpath(SCRIPT_DIR.parents[1], 'src/model_converters/tvm_converter/tvm_converter.py')

Expand All @@ -17,6 +19,8 @@ def test_smoke_dl_models(test_configuration):
command_line = (f'python3 {QUANTIZATION_TFLITE} -c {test_configuration.config_path}')
elif test_configuration.framework == 'TVM':
command_line = (f'python3 {QUANTIZATION_TVM} -c {test_configuration.config_path}')
elif test_configuration.framework == 'PADDLEPADDLE':
command_line = (f'python3 {QUANTIZATION_PADDLE} -c {test_configuration.config_path}')
else:
raise Exception(f'Unsupported framework: {test_configuration.framework}')

Expand Down
Loading