shenweichen
diff --git a/‎.github/ISSUE_TEMPLATE/bug_report.md
Lines changed: 3 additions & 3 deletions b/‎.github/ISSUE_TEMPLATE/bug_report.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/question.md
Lines changed: 3 additions & 3 deletions b/‎.github/ISSUE_TEMPLATE/question.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/ci.yml
Lines changed: 26 additions & 5 deletions b/‎.github/workflows/ci.yml
Lines changed: 26 additions & 5 deletions
diff --git a/‎README.md
Lines changed: 11 additions & 0 deletions b/‎README.md
Lines changed: 11 additions & 0 deletions
diff --git a/‎deepmatch/__init__.py
Lines changed: 1 addition & 1 deletion b/‎deepmatch/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎deepmatch/layers/__init__.py
Lines changed: 4 additions & 2 deletions b/‎deepmatch/layers/__init__.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎deepmatch/layers/core.py
Lines changed: 81 additions & 30 deletions b/‎deepmatch/layers/core.py
Lines changed: 81 additions & 30 deletions
diff --git a/‎deepmatch/models/dssm.py
Lines changed: 6 additions & 5 deletions b/‎deepmatch/models/dssm.py
Lines changed: 6 additions & 5 deletions
diff --git a/‎deepmatch/models/fm.py
Lines changed: 3 additions & 2 deletions b/‎deepmatch/models/fm.py
Lines changed: 3 additions & 2 deletions
@@ -18,9 +18,9 @@ Steps to reproduce the behavior:
 4. See error
 
 **Operating environment(运行环境):**
- - python version [e.g. 3.6, 3.7]
- - tensorflow version [e.g. 1.4.0, 1.14.0, 2.3.0]
- - deepmatch version [e.g. 0.2.0,]
+ - python version [e.g. 3.6, 3.7, 3.8]
+ - tensorflow version [e.g. 1.4.0, 1.14.0, 2.5.0]
+ - deepmatch version [e.g. 0.2.1,]
 
 **Additional context**
 Add any other context about the problem here.
@@ -15,6 +15,6 @@ A clear and concise description of what the question is.
 Add any other context about the problem here.
 
 **Operating environment(运行环境):**
- - python version [e.g. 3.6]
- - tensorflow version [e.g. 1.4.0,]
- - deepmatch version [e.g. 0.2.0,]
+ - python version [e.g. 3.6, 3.7, 3.8]
+ - tensorflow version [e.g. 1.4.0, 1.14.0, 2.5.0]
+ - deepmatch version [e.g. 0.2.1,]
@@ -17,25 +17,46 @@ jobs:
     timeout-minutes: 120
     strategy:
       matrix:
-        python-version: [3.5,3.6,3.7]
-        tf-version: [1.4.0,1.14.0,2.1.0,2.2.0,2.3.0]
+        python-version: [3.6,3.7,3.8]
+        tf-version: [1.4.0,1.14.0,2.5.0]
 
         exclude:
           - python-version: 3.7
             tf-version: 1.4.0
+          - python-version: 3.7
+            tf-version: 1.15.0
+          - python-version: 3.8
+            tf-version: 1.4.0
+          - python-version: 3.8
+            tf-version: 1.14.0
+          - python-version: 3.8
+            tf-version: 1.15.0
+          - python-version: 3.6
+            tf-version: 2.7.0
+          - python-version: 3.6
+            tf-version: 2.8.0
+          - python-version: 3.6
+            tf-version: 2.9.0
+          - python-version: 3.9
+            tf-version: 1.4.0
+          - python-version: 3.9
+            tf-version: 1.15.0
+          - python-version: 3.9
+            tf-version: 2.2.0
 
     steps:
 
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v3
 
     - name: Setup python environment
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
 
     - name: Install dependencies
       run: |
         pip3 install -q tensorflow==${{ matrix.tf-version }}
+        pip install -q protobuf==3.19.0
         pip install -q requests
         pip install -e .
     - name: Test with pytest
@@ -46,7 +67,7 @@ jobs:
         pip install -q python-coveralls
         pytest --cov=deepmatch --cov-report=xml
     - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v1.0.2
+      uses: codecov/codecov-action@v3.1.0
       with:
         token: ${{secrets.CODECOV_TOKEN}}
         file: ./coverage.xml
 
@@ -47,6 +47,12 @@ Let's [**Get Started!**](https://deepmatch.readthedocs.io/en/latest/Quick-Start.
          <a href="https://github.com/wangzhegeek">Wang Zhe</a> 
         <p>Baidu Inc.  </p>
       </td>
+      <td>
+         <a href="https://github.com/clhchtcjj"><img width="70" height="70" src="https://github.com/clhchtcjj.png?s=40" alt="pic"></a><br>
+         <a href="https://github.com/clhchtcjj">Chen Leihui</a> 
+        <p>
+        Alibaba Group  </p>
+      </td>
       <td>
          <a href="https://github.com/LeoCai"><img width="70" height="70" src="https://github.com/LeoCai.png?s=40" alt="pic"></a><br>
          <a href="https://github.com/LeoCai">LeoCai</a>
@@ -57,6 +63,11 @@ Let's [**Get Started!**](https://deepmatch.readthedocs.io/en/latest/Quick-Start.
          <a href="https://github.com/yangjieyu">Yang Jieyu</a>
         <p> Ant Group    </p>
       </td>
+      <td>
+         <a href="https://github.com/zzszmyf"><img width="70" height="70" src="https://github.com/zzszmyf.png?s=40" alt="pic"></a><br>
+         <a href="https://github.com/zzszmyf">Meng Yifan</a>
+        <p> DeepCTR    </p>
+      </td>
     </tr>
   </tbody>
 </table>
 
@@ -1,4 +1,4 @@
 from .utils import check_version
 
-__version__ = '0.2.0'
+__version__ = '0.2.1'
 check_version(__version__)
@@ -1,7 +1,8 @@
 from deepctr.layers import custom_objects
 from deepctr.layers.utils import reduce_sum
 
-from .core import PoolingLayer, Similarity, LabelAwareAttention, CapsuleLayer, SampledSoftmaxLayer, EmbeddingIndex
+from .core import PoolingLayer, Similarity, LabelAwareAttention, CapsuleLayer, SampledSoftmaxLayer, EmbeddingIndex, \
+    MaskUserEmbedding
 from .interaction import DotAttention, ConcatAttention, SoftmaxWeightedSum, AttentionSequencePoolingLayer, \
     SelfAttention, \
     SelfMultiHeadAttention, UserAttention
@@ -23,7 +24,8 @@
                    'SelfAttention': SelfAttention,
                    'SelfMultiHeadAttention': SelfMultiHeadAttention,
                    'UserAttention': UserAttention,
-                   'DynamicMultiRNN': DynamicMultiRNN
+                   'DynamicMultiRNN': DynamicMultiRNN,
+                   'MaskUserEmbedding': MaskUserEmbedding
                    }
 
 custom_objects = dict(custom_objects, **_custom_objects)
@@ -1,7 +1,7 @@
 import tensorflow as tf
 from deepctr.layers.activation import activation_layer
 from deepctr.layers.utils import reduce_max, reduce_mean, reduce_sum, concat_func, div, softmax
-from tensorflow.python.keras.initializers import RandomNormal, Zeros, glorot_normal
+from tensorflow.python.keras.initializers import RandomNormal, Zeros, TruncatedNormal
 from tensorflow.python.keras.layers import Layer
 from tensorflow.python.keras.regularizers import l2
 
@@ -103,19 +103,19 @@ def call(self, inputs, training=None, **kwargs):
         weight = tf.pow(weight, self.pow_p)  # [x,k_max,1]
 
         if len(inputs) == 3:
-            k_user = tf.cast(tf.maximum(
-                1.,
-                tf.minimum(
-                    tf.cast(self.k_max, dtype="float32"),  # k_max
-                    tf.log1p(tf.cast(inputs[2], dtype="float32")) / tf.log(2.)  # hist_len
-                )
-            ), dtype="int64")
+            k_user = inputs[2]
             seq_mask = tf.transpose(tf.sequence_mask(k_user, self.k_max), [0, 2, 1])
             padding = tf.ones_like(seq_mask, dtype=tf.float32) * (-2 ** 32 + 1)  # [x,k_max,1]
             weight = tf.where(seq_mask, weight, padding)
 
-        weight = softmax(weight, dim=1, name="weight")
-        output = reduce_sum(keys * weight, axis=1)
+        if self.pow_p >= 100:
+            idx = tf.stack(
+                [tf.range(tf.shape(keys)[0]), tf.squeeze(tf.argmax(weight, axis=1, output_type=tf.int32), axis=1)],
+                axis=1)
+            output = tf.gather_nd(keys, idx)
+        else:
+            weight = softmax(weight, dim=1, name="weight")
+            output = tf.reduce_sum(keys * weight, axis=1)
 
         return output
 
@@ -172,32 +172,59 @@ def __init__(self, input_units, out_units, max_len, k_max, iteration_times=3,
         super(CapsuleLayer, self).__init__(**kwargs)
 
     def build(self, input_shape):
-        self.routing_logits = self.add_weight(shape=[1, self.k_max, self.max_len],
-                                              initializer=RandomNormal(stddev=self.init_std),
-                                              trainable=False, name="B", dtype=tf.float32)
         self.bilinear_mapping_matrix = self.add_weight(shape=[self.input_units, self.out_units],
-                                                       initializer=RandomNormal(stddev=self.init_std),
                                                        name="S", dtype=tf.float32)
         super(CapsuleLayer, self).build(input_shape)
 
     def call(self, inputs, **kwargs):
-        behavior_embddings, seq_len = inputs
-        batch_size = tf.shape(behavior_embddings)[0]
-        seq_len_tile = tf.tile(seq_len, [1, self.k_max])
+
+        behavior_embedding = inputs[0]
+        seq_len = inputs[1]
+        batch_size = tf.shape(behavior_embedding)[0]
+
+        mask = tf.reshape(tf.sequence_mask(seq_len, self.max_len, tf.float32), [-1, self.max_len, 1, 1])
+
+        behavior_embedding_mapping = tf.tensordot(behavior_embedding, self.bilinear_mapping_matrix, axes=1)
+        behavior_embedding_mapping = tf.expand_dims(behavior_embedding_mapping, axis=2)
+
+        behavior_embdding_mapping_ = tf.stop_gradient(behavior_embedding_mapping)  # N,max_len,1,E
+        try:
+            routing_logits = tf.truncated_normal([batch_size, self.max_len, self.k_max, 1], stddev=self.init_std)
+        except AttributeError:
+            routing_logits = tf.compat.v1.truncated_normal([batch_size, self.max_len, self.k_max, 1],
+                                                           stddev=self.init_std)
+        routing_logits = tf.stop_gradient(routing_logits)
+
+        k_user = None
+        if len(inputs) == 3:
+            k_user = inputs[2]
+            interest_mask = tf.sequence_mask(k_user, self.k_max, tf.float32)
+            interest_mask = tf.reshape(interest_mask, [batch_size, 1, self.k_max, 1])
+            interest_mask = tf.tile(interest_mask, [1, self.max_len, 1, 1])
+
+            interest_padding = tf.ones_like(interest_mask) * -2 ** 31
+            interest_mask = tf.cast(interest_mask, tf.bool)
 
         for i in range(self.iteration_times):
-            mask = tf.sequence_mask(seq_len_tile, self.max_len)
-            pad = tf.ones_like(mask, dtype=tf.float32) * (-2 ** 32 + 1)
-            routing_logits_with_padding = tf.where(mask, tf.tile(self.routing_logits, [batch_size, 1, 1]), pad)
-            weight = tf.nn.softmax(routing_logits_with_padding)
-            behavior_embdding_mapping = tf.tensordot(behavior_embddings, self.bilinear_mapping_matrix, axes=1)
-            Z = tf.matmul(weight, behavior_embdding_mapping)
-            interest_capsules = squash(Z)
-            delta_routing_logits = reduce_sum(
-                tf.matmul(interest_capsules, tf.transpose(behavior_embdding_mapping, perm=[0, 2, 1])),
-                axis=0, keep_dims=True
-            )
-            self.routing_logits.assign_add(delta_routing_logits)
+            if k_user is not None:
+                routing_logits = tf.where(interest_mask, routing_logits, interest_padding)
+            try:
+                weight = softmax(routing_logits, 2) * mask
+            except TypeError:
+                weight = tf.transpose(softmax(tf.transpose(routing_logits, [0, 1, 3, 2])),
+                                      [0, 1, 3, 2]) * mask  # N,max_len,k_max,1
+            if i < self.iteration_times - 1:
+                Z = reduce_sum(tf.matmul(weight, behavior_embdding_mapping_), axis=1, keep_dims=True)  # N,1,k_max,E
+                interest_capsules = squash(Z)
+                delta_routing_logits = reduce_sum(
+                    interest_capsules * behavior_embdding_mapping_,
+                    axis=-1, keep_dims=True
+                )
+                routing_logits += delta_routing_logits
+            else:
+                Z = reduce_sum(tf.matmul(weight, behavior_embedding_mapping), axis=1, keep_dims=True)
+                interest_capsules = squash(Z)
+
         interest_capsules = tf.reshape(interest_capsules, [-1, self.k_max, self.out_units])
         return interest_capsules
 
@@ -213,7 +240,7 @@ def get_config(self, ):
 
 def squash(inputs):
     vec_squared_norm = reduce_sum(tf.square(inputs), axis=-1, keep_dims=True)
-    scalar_factor = vec_squared_norm / (1 + vec_squared_norm) / tf.sqrt(vec_squared_norm + 1e-8)
+    scalar_factor = vec_squared_norm / (1 + vec_squared_norm) / tf.sqrt(vec_squared_norm + 1e-9)
     vec_squashed = scalar_factor * inputs
     return vec_squashed
 
@@ -235,3 +262,27 @@ def get_config(self, ):
         config = {'index': self.index, }
         base_config = super(EmbeddingIndex, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
+
+
+class MaskUserEmbedding(Layer):
+
+    def __init__(self, k_max, **kwargs):
+        self.k_max = k_max
+        super(MaskUserEmbedding, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        super(MaskUserEmbedding, self).build(
+            input_shape)  # Be sure to call this somewhere!
+
+    def call(self, x, training=None, **kwargs):
+        user_embedding, interest_num = x
+        if not training:
+            interest_mask = tf.sequence_mask(interest_num, self.k_max, tf.float32)
+            interest_mask = tf.reshape(interest_mask, [-1, self.k_max, 1])
+            user_embedding *= interest_mask
+        return user_embedding
+
+    def get_config(self, ):
+        config = {'k_max': self.k_max, }
+        base_config = super(MaskUserEmbedding, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
@@ -15,8 +15,8 @@
 
 def DSSM(user_feature_columns, item_feature_columns, user_dnn_hidden_units=(64, 32),
          item_dnn_hidden_units=(64, 32),
-         dnn_activation='tanh', dnn_use_bn=False,
-         l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, seed=1024, metric='cos'):
+         dnn_activation='relu', dnn_use_bn=False,
+         l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, gamma=10, seed=1024, metric='cos'):
     """Instantiates the Deep Structured Semantic Model architecture.
 
     :param user_feature_columns: An iterable containing user's features used by  the model.
@@ -28,6 +28,7 @@ def DSSM(user_feature_columns, item_feature_columns, user_dnn_hidden_units=(64,
     :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
     :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
+    :param gamma: float. Scaling factor.
     :param seed: integer ,to use as random seed.
     :param metric: str, ``"cos"`` for  cosine  or  ``"ip"`` for inner product
     :return: A Keras model instance.
@@ -55,12 +56,12 @@ def DSSM(user_feature_columns, item_feature_columns, user_dnn_hidden_units=(64,
     item_dnn_input = combined_dnn_input(item_sparse_embedding_list, item_dense_value_list)
 
     user_dnn_out = DNN(user_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
-                       dnn_use_bn, seed=seed)(user_dnn_input)
+                       dnn_use_bn, output_activation='linear', seed=seed)(user_dnn_input)
 
     item_dnn_out = DNN(item_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
-                       dnn_use_bn, seed=seed)(item_dnn_input)
+                       dnn_use_bn, output_activation='linear', seed=seed)(item_dnn_input)
 
-    score = Similarity(type=metric, gamma = 10)([user_dnn_out, item_dnn_out])
+    score = Similarity(type=metric, gamma=gamma)([user_dnn_out, item_dnn_out])
 
     output = PredictionLayer("binary", False)(score)
 
 
@@ -8,12 +8,13 @@
 from ..layers.core import Similarity
 
 
-def FM(user_feature_columns, item_feature_columns, l2_reg_embedding=1e-6, seed=1024, metric='cos'):
+def FM(user_feature_columns, item_feature_columns, l2_reg_embedding=1e-6, gamma=10, seed=1024, metric='cos'):
     """Instantiates the FM architecture.
 
     :param user_feature_columns: An iterable containing user's features used by  the model.
     :param item_feature_columns: An iterable containing item's features used by  the model.
     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
+    :param gamma: float. Scaling factor.
     :param seed: integer ,to use as random seed.
     :param metric: str, ``"cos"`` for  cosine  or  ``"ip"`` for inner product
     :return: A Keras model instance.
@@ -46,7 +47,7 @@ def FM(user_feature_columns, item_feature_columns, l2_reg_embedding=1e-6, seed=1
     item_dnn_input = concat_func(item_sparse_embedding_list, axis=1)
     item_vector_sum = Lambda(lambda x: reduce_sum(x, axis=1, keep_dims=False))(item_dnn_input)
 
-    score = Similarity(type=metric)([user_vector_sum, item_vector_sum])
+    score = Similarity(type=metric, gamma=gamma)([user_vector_sum, item_vector_sum])
 
     output = PredictionLayer("binary", False)(score)