Merge pull request #186 from Channingss/support_nlp

channings · web-flow · commit 2c8421ce6637 · 2021-02-01T10:12:05.000+08:00
Support ERNIE of PaddleNLP
diff --git a/docs/en/op_list.md b/docs/en/op_list.md
@@ -18,6 +18,7 @@
 | conv2d | 1~12 |
 | conv2d_transpose | 1~12 |
 | collect_fpn_proposals | 11~12 |
+| cumsum | 11~12 |
 | deformable_conv | 11~12 |
 | depthwise_conv2d | 1~12 |
 | distribute_fpn_proposals | 11~12 |
@@ -51,9 +52,12 @@
 | leaky_relu | 1~12 |
 | less_equal| 12~ |
 | log | 1~12 |
+| lookup_table | 1~12 |
+| lookup_table_v2 | 1~12 |
 | logical_and | 1~12 |
 | matmul | 1~12 |
 | matmul_v2 | 1~12 |
+| mean | 1~12 |
 | mul | 1~12 |
 | muticlass_nms | 10~12 |
 | muticlass_nms2 | 10~12 |
@@ -80,11 +84,13 @@
 | softmax | 1~12 |
 | scale | 1~12 | opset 1~6 limited supported |
 | sequence_expand | 1~12 |
+| softmax_with_cross_entropy | 12 |
 | shape | 1~12 |
 | sigmoid | 1~12 |
 | slice | 1~12 |
 | split | 1~12 |
 | squeeze2 | 1~12 |
+| square | 7~12 |
 | sqrt | 1~12 |
 | stack | 1~12 |
 | stride_slice | 1~12 |
diff --git a/docs/zh/model_zoo.md b/docs/zh/model_zoo.md
@@ -47,10 +47,17 @@
 ## 图像检测
 待测试
 
+## 自然语言处理
+目前支持的模型有ERNIE系列模型，测试模型来自于PaddleNLP [2.0-beta 分支](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP)。
+
+| 模型 | 来源 |
+|-------|--------|
+|ERNIE-1.0|[PaddleNLP](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/docs/models.md#paddlenlpmodels) |
+|ERNIE-2.0|[PaddleNLP](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/docs/models.md#paddlenlpmodels) |
 
 # 静态图模型
 ## 图像分类
-图像分类模型支持比较完善，测试模型来自于 PaddleCls [master/](https://github.com/PaddlePaddle/PaddleClas/tree/master)。
+图像分类模型支持比较完善，测试模型来自于 PaddleCls [master 分支](https://github.com/PaddlePaddle/PaddleClas/tree/master)。
 
 | 模型 | 来源 |
 |-------|--------|
diff --git a/docs/zh/op_list.md b/docs/zh/op_list.md
@@ -18,6 +18,7 @@
 | conv2d | 1~12 |
 | conv2d_transpose | 1~12 |
 | collect_fpn_proposals | 11~12 |
+| cumsum | 11~12 |
 | deformable_conv | 11~12 |
 | depthwise_conv2d | 1~12 |
 | distribute_fpn_proposals | 11~12 |
@@ -51,9 +52,12 @@
 | leaky_relu | 1~12 |
 | less_equal| 12~ |
 | log | 1~12 |
+| lookup_table | 1~12 |
+| lookup_table_v2 | 1~12 |
 | logical_and | 1~12 |
 | matmul | 1~12 |
 | matmul_v2 | 1~12 |
+| mean | 1~12 |
 | mul | 1~12 |
 | muticlass_nms | 10~12 |
 | muticlass_nms2 | 10~12 |
@@ -80,11 +84,13 @@
 | softmax | 1~12 |
 | scale | 1~12 | opset 1~6 limited supported |
 | sequence_expand | 1~12 |
+| softmax_with_cross_entropy | 12 |
 | shape | 1~12 |
 | sigmoid | 1~12 |
 | slice | 1~12 |
 | split | 1~12 |
 | squeeze2 | 1~12 |
+| square | 7~12 |
 | sqrt | 1~12 |
 | stack | 1~12 |
 | stride_slice | 1~12 |
diff --git a/paddle2onnx/__init__.py b/paddle2onnx/__init__.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 from __future__ import absolute_import
 
-__version__ = "0.4"
+__version__ = "0.5"
 
 from .convert import dygraph2onnx, program2onnx
 from .op_mapper import register_op_mapper
diff --git a/paddle2onnx/op_mapper/math.py b/paddle2onnx/op_mapper/math.py
@@ -17,6 +17,7 @@
 import numpy as np
 from paddle2onnx.constant import dtypes
 from paddle2onnx.op_mapper import OpMapper as op_mapper
+from paddle2onnx.op_mapper import mapper_helper
 
 
 @op_mapper('matmul')
@@ -27,8 +28,24 @@ class MatMul():
     def opset_1(cls, graph, node, **kw):
         x = node.input('X', idx=0)
         y = node.input('Y', idx=0)
-        graph.make_node('MatMul', inputs=[x, y], outputs=node.output('Out'))
-
+        if node.attr('transpose_X'):
+            perm = list(range(len(node.input_shape('X', 0))))
+            perm[-1], perm[-2] = perm[-2], perm[-1]
+            x = graph.make_node('Transpose', inputs=[x], perm=perm)
+        if node.attr('transpose_Y'):
+            perm = list(range(len(node.input_shape('Y', 0))))
+            perm[-1], perm[-2] = perm[-2], perm[-1]
+            y = graph.make_node('Transpose', inputs=[y], perm=perm)
+        if node.attr('alpha') == 1.0:
+            graph.make_node('MatMul', inputs=[x, y], outputs=node.output('Out'))
+        else:
+            matmul = graph.make_node('MatMul', inputs=[x, y])
+            scale = graph.make_node(
+                'Constant',
+                dtype=dtypes.ONNX.FLOAT,
+                value=node.attr('alpha'))
+            onnx_node = graph.make_node(
+                'Mul', inputs=[matmul, scale], outputs=node.output('Out'))
 
 @op_mapper('matmul_v2')
 class MatMul():
@@ -40,9 +57,13 @@ def opset_1(cls, graph, node, **kw):
         y = node.input('Y', idx=0)
         out = node.output('Out')
         if node.attr('trans_x'):
-            x = graph.make_node('Transpose', inputs=[x])
+            perm = list(range(len(node.input_shape('X', 0))))
+            perm[-1], perm[-2] = perm[-2], perm[-1]
+            x = graph.make_node('Transpose', inputs=[x], perm=perm)
         if node.attr('trans_y'):
-            y = graph.make_node('Transpose', inputs=[y])
+            perm = list(range(len(node.input_shape('Y', 0))))
+            perm[-1], perm[-2] = perm[-2], perm[-1]
+            y = graph.make_node('Transpose', inputs=[y], perm=perm)
         graph.make_node('MatMul', inputs=[x, y], outputs=out)
 
 
@@ -131,6 +152,30 @@ def opset_8(cls, graph, node, **kw):
             'Pow', inputs=[x, factor_broadcast], outputs=node.output('Out'))
 
 
+@op_mapper('square')
+class Square():
+    support_opset_verision_range = (7, 12)
+
+    @classmethod
+    def opset_7(cls, graph, node, **kw):
+        x = node.input('X', 0)
+        onnx_node = graph.make_node(
+            'Mul', inputs=[x, x], outputs=node.output('Out'))
+
+@op_mapper('cumsum')
+class CumSum():
+    support_opset_version_range = (11, 12)
+
+    @classmethod
+    def opset_11(cls, graph, node, **kw):
+
+        axis = graph.make_node('Constant', dtype=dtypes.ONNX.INT64, value=node.attr('axis'))
+        graph.make_node(
+            'CumSum',
+            inputs=[node.input('X', 0), axis],
+            outputs=node.output('Out'))
+
+
 @op_mapper('mul')
 class Mul():
     support_opset_version_range = (1, 12)
@@ -140,16 +185,24 @@ def opset_1(cls, graph, node, **kw):
         x = node.input('X', 0)
         y = node.input('Y', 0)
         out = node.output('Out', 0)
-        x_shape = node.input_shape('X', 0)
-        y_shape = node.input_shape('Y', 0)
         x_num_col_dims = node.attr('x_num_col_dims')
         y_num_col_dims = node.attr('y_num_col_dims')
         flatten_x = graph.make_node(
             'Flatten', inputs=node.input('X'), attrs={'axis': x_num_col_dims})
         flatten_y = graph.make_node(
             'Flatten', inputs=node.input('Y'), attrs={'axis': y_num_col_dims})
-        mul_node = graph.make_node(
-            'MatMul', inputs=[flatten_x, flatten_y], outputs=node.output('Out'))
+        mul_node = graph.make_node('MatMul', inputs=[flatten_x, flatten_y])
+
+        x_shape = graph.make_node('Shape', inputs=[x])
+        l_shape = mapper_helper.slice_helper(
+            graph, x_shape, axes=[0], starts=[0], ends=[x_num_col_dims])
+        y_shape = graph.make_node('Shape', inputs=[y])
+        y_rank = len(node.input_shape('Y', 0))
+        r_shape = mapper_helper.slice_helper(
+            graph, y_shape, axes=[0], starts=[y_num_col_dims], ends=[y_rank])
+
+        out_shape = graph.make_node('Concat', inputs=[l_shape, r_shape], axis=0)
+        graph.make_node('Reshape', [mul_node, out_shape], node.output('Out'))
 
 
 @op_mapper('affine_channel')
@@ -244,6 +297,19 @@ def opset_1(cls, graph, node, **kw):
                 axes=[0])
 
 
+@op_mapper('mean')
+class Mean():
+    support_opset_verison_range = (1, 12)
+
+    @classmethod
+    def opset_1(cls, graph, node, **kw):
+        graph.make_node(
+            'ReduceMean',
+            inputs=node.input('X'),
+            outputs=node.output('Out'),
+            keepdims=0)
+
+
 @op_mapper('arg_max')
 class ArgMax():
     support_opset_version_range = (1, 12)
@@ -282,26 +348,28 @@ def opset_7(cls, graph, node, **kw):
                 'Identity', inputs=node.input('X'), outputs=node.output('Out'))
         else:
             scale_node = graph.make_node(
-                'Constant', attrs={'dtype': dtypes.ONNX.FLOAT,
-                                   'value': scale})
+                'Constant',
+                attrs={'dtype': dtypes.ONNX.FLOAT,
+                       'value': [scale]})
             bias_node = graph.make_node(
-                'Constant', attrs={'dtype': dtypes.ONNX.FLOAT,
-                                   'value': bias})
+                'Constant',
+                attrs={'dtype': dtypes.ONNX.FLOAT,
+                       'value': [bias]})
             cast_node = graph.make_node(
                 'Cast', inputs=node.input('X'),
                 attrs={'to': dtypes.ONNX.FLOAT})
             if node.attr('bias_after_scale'):
-                node1 = graph.make_node('Mul', inputs=[scale_node, cast_node])
+                node1 = graph.make_node('Mul', inputs=[cast_node, scale_node])
                 node2 = graph.make_node(
                     'Add',
-                    inputs=[bias_node, node1],
+                    inputs=[node1, bias_node],
                     outputs=node.output('Out'))
             else:
-                node1 = graph.make_node('Add', inputs=[bias_node, cast_node])
+                node1 = graph.make_node('Add', inputs=[cast_node, bias_node])
                 node2 = graph.make_node(
                     'Mul',
-                    inputs=[scale_node, node1],
-                    outputs=[node.output('Out')])
+                    inputs=[node1, scale_node],
+                    outputs=[node.output('Out', 0)])
 
 
 @op_mapper('softmax')
@@ -333,3 +401,50 @@ def opset_1(cls, graph, node, **kw):
                 inputs=[softmax_node],
                 outputs=node.output('Out'),
                 attrs={'perm': perm})
+
+
+@op_mapper('softmax_with_cross_entropy')
+class SoftmaxCrossEntropyLoss():
+    support_opset_verison_range = (12, 12)
+
+    @classmethod
+    def opset_12(cls, graph, node, **kw):
+        if node.attr('soft_label'):
+            raise Exception(
+                "SoftmaxCrossEntropyLoss in onnx not support soft label.")
+
+        labels = node.input('Label', 0)
+        scores = node.input('Logits', 0)
+
+        outputs = [node.output('Loss', 0)]
+        if 'Softmax' in node.outputs:
+            outputs.append(node.output('Softmax', 0))
+
+        shape = node.input_shape('Logits', 0)
+        axis = node.attr('axis')
+        if axis < 0:
+            axis += len(shape)
+        if axis == len(shape) - 1:
+            graph.make_node(
+                'SoftmaxCrossEntropyLoss',
+                inputs=[scores, labels],
+                outputs=outputs,
+                ignore_index=node.attr('ignore_index'),
+                reduction='mean')
+        else:
+            perm = [i for i in range(len(shape))]
+            perm[-1] = axis
+            perm[axis] = len(shape) - 1
+            transpose_node = graph.make_node(
+                'Transpose', inputs=node.input('X'), attrs={'perm': perm})
+            node = graph.make_node(
+                'SoftmaxCrossEntropyLoss',
+                inputs=[scores, labels],
+                outputs=outputs,
+                ignore_index=node.attr('ignore_index'),
+                reduction='mean')
+            transpose_node1 = graph.make_node(
+                'Transpose',
+                inputs=[softmax_node],
+                outputs=node.output('Out'),
+                attrs={'perm': perm})
diff --git a/paddle2onnx/op_mapper/tensor.py b/paddle2onnx/op_mapper/tensor.py
@@ -365,6 +365,21 @@ def opset_1(cls, graph, node, **kw):
             })
 
 
+@op_mapper(['lookup_table_v2', 'lookup_table'])
+class Embedding():
+    support_opset_verison_range = (1, 12)
+
+    @classmethod
+    def opset_1(cls, graph, node, **kw):
+        ids = node.input('Ids', 0)
+        if node.type == 'lookup_table' and node.input_shape('Ids', 0)[-1] == 1:
+            ids = graph.make_node(
+                'Squeeze', inputs=node.input('Ids', 0), axes=[-1])
+        graph.make_node(
+            'Gather',
+            inputs=[node.input('W', 0), ids],
+            outputs=node.output('Out'))
+
 @op_mapper('fill_constant_batch_size_like')
 class FillConstantBatchSizeLike():
     support_opset_verison_range = (9, 12)
@@ -414,14 +429,15 @@ def opset_9(cls, graph, node, **kw):
         input_dtype = node.input_var('X', 0).dtype
         if dtype is None:
             dtype = input_dtype
-        dtype = dtypes.DTYPE_PADDLE_ONNX_MAP[dtype]
+        np_dtype = dtypes.DTYPE_PADDLE_STR_MAP[dtype]
+        onnx_dtype = dtypes.DTYPE_PADDLE_ONNX_MAP[dtype]
         graph.make_node(
             'ConstantOfShape',
             inputs=[shape_node],
             outputs=node.output('Out'),
             dims=[1],
-            dtype=dtype,
-            value=value)
+            dtype=onnx_dtype,
+            value=np.array(value).astype(np_dtype))
 
 
 @op_mapper('gather')
diff --git a/setup.py b/setup.py
@@ -22,7 +22,7 @@
 
 setuptools.setup(
     name="paddle2onnx",
-    version=0.4,
+    version=0.5,
     author="dltp-sz",
     author_email="dltp-sz@baidu.com",
     description="a toolkit for converting trained model of PaddlePaddle to ONNX.",