|
| 1 | +# Copyright 1999-2020 Alibaba Group Holding Ltd. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +import numpy as np |
| 16 | + |
| 17 | +from .... import opcodes as OperandDef |
| 18 | +from ....serialize import KeyField |
| 19 | +from .... import tensor as mt |
| 20 | +from ....tensor.core import TensorOrder |
| 21 | +from ....tensor.utils import recursive_tile |
| 22 | +from ...preprocessing import normalize |
| 23 | +from .core import PairwiseDistances |
| 24 | + |
| 25 | + |
| 26 | +class CosineDistances(PairwiseDistances): |
| 27 | + _op_type_ = OperandDef.PAIRWISE_COSINE_DISTANCES |
| 28 | + |
| 29 | + _x = KeyField('x') |
| 30 | + _y = KeyField('y') |
| 31 | + |
| 32 | + def __init__(self, x=None, y=None, dtype=None, gpu=None, **kw): |
| 33 | + super().__init__(_x=x, _y=y, _dtype=dtype, _gpu=gpu, **kw) |
| 34 | + |
| 35 | + @property |
| 36 | + def x(self): |
| 37 | + return self._x |
| 38 | + |
| 39 | + @property |
| 40 | + def y(self): |
| 41 | + return self._y |
| 42 | + |
| 43 | + def _set_inputs(self, inputs): |
| 44 | + super()._set_inputs(inputs) |
| 45 | + self._x = self._inputs[0] |
| 46 | + self._y = self._inputs[1] |
| 47 | + |
| 48 | + def __call__(self, x, y=None): |
| 49 | + x, y = self.check_pairwise_arrays(x, y) |
| 50 | + return self.new_tensor([x, y], shape=(x.shape[0], y.shape[0]), |
| 51 | + order=TensorOrder.C_ORDER) |
| 52 | + |
| 53 | + @classmethod |
| 54 | + def tile(cls, op): |
| 55 | + x, y = op.x, op.y |
| 56 | + if x is y: |
| 57 | + S = cosine_similarity(x) |
| 58 | + else: |
| 59 | + S = cosine_similarity(x, y) |
| 60 | + S = (S * -1) + 1 |
| 61 | + S = mt.clip(S, 0, 2) |
| 62 | + if x is y: |
| 63 | + mt.fill_diagonal(S, 0.0) |
| 64 | + return [recursive_tile(S)] |
| 65 | + |
| 66 | + |
| 67 | +def cosine_similarity(X, Y=None, dense_output=True): |
| 68 | + """Compute cosine similarity between samples in X and Y. |
| 69 | +
|
| 70 | + Cosine similarity, or the cosine kernel, computes similarity as the |
| 71 | + normalized dot product of X and Y: |
| 72 | +
|
| 73 | + K(X, Y) = <X, Y> / (||X||*||Y||) |
| 74 | +
|
| 75 | + On L2-normalized data, this function is equivalent to linear_kernel. |
| 76 | +
|
| 77 | + Read more in the :ref:`User Guide <cosine_similarity>`. |
| 78 | +
|
| 79 | + Parameters |
| 80 | + ---------- |
| 81 | + X : Tensor or sparse tensor, shape: (n_samples_X, n_features) |
| 82 | + Input data. |
| 83 | +
|
| 84 | + Y : Tensor or sparse tensor, shape: (n_samples_Y, n_features) |
| 85 | + Input data. If ``None``, the output will be the pairwise |
| 86 | + similarities between all samples in ``X``. |
| 87 | +
|
| 88 | + dense_output : boolean (optional), default True |
| 89 | + Whether to return dense output even when the input is sparse. If |
| 90 | + ``False``, the output is sparse if both input tensors are sparse. |
| 91 | +
|
| 92 | + Returns |
| 93 | + ------- |
| 94 | + kernel matrix : Tensor |
| 95 | + A tensor with shape (n_samples_X, n_samples_Y). |
| 96 | + """ |
| 97 | + X, Y = PairwiseDistances.check_pairwise_arrays(X, Y) |
| 98 | + |
| 99 | + X_normalized = normalize(X, copy=True) |
| 100 | + if X is Y: |
| 101 | + Y_normalized = X_normalized |
| 102 | + else: |
| 103 | + Y_normalized = normalize(Y, copy=True) |
| 104 | + |
| 105 | + K = X_normalized.dot(Y_normalized.T) |
| 106 | + if dense_output: |
| 107 | + K = K.todense() |
| 108 | + return K |
| 109 | + |
| 110 | + |
| 111 | +def cosine_distances(X, Y=None): |
| 112 | + """Compute cosine distance between samples in X and Y. |
| 113 | +
|
| 114 | + Cosine distance is defined as 1.0 minus the cosine similarity. |
| 115 | +
|
| 116 | + Read more in the :ref:`User Guide <metrics>`. |
| 117 | +
|
| 118 | + Parameters |
| 119 | + ---------- |
| 120 | + X : array_like, sparse matrix |
| 121 | + with shape (n_samples_X, n_features). |
| 122 | +
|
| 123 | + Y : array_like, sparse matrix (optional) |
| 124 | + with shape (n_samples_Y, n_features). |
| 125 | +
|
| 126 | + Returns |
| 127 | + ------- |
| 128 | + distance matrix : Tensor |
| 129 | + A tensor with shape (n_samples_X, n_samples_Y). |
| 130 | +
|
| 131 | + See also |
| 132 | + -------- |
| 133 | + mars.learn.metrics.pairwise.cosine_similarity |
| 134 | + mars.tensor.spatial.distance.cosine : dense matrices only |
| 135 | + """ |
| 136 | + op = CosineDistances(x=X, y=Y, dtype=np.dtype(np.float64)) |
| 137 | + return op(X, y=Y) |
0 commit comments