forked from scverse/scanpy
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path_trimap.py
More file actions
143 lines (127 loc) · 4.4 KB
/
_trimap.py
File metadata and controls
143 lines (127 loc) · 4.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""Embed high-dimensional data using TriMap."""
from __future__ import annotations
from typing import TYPE_CHECKING
from ... import logging as logg
from ..._compat import CSBase, old_positionals
from ..._settings import settings
from ..._utils._doctests import doctest_needs
if TYPE_CHECKING:
from typing import Literal
from anndata import AnnData
@old_positionals(
"n_inliers",
"n_outliers",
"n_random",
"metric",
"weight_adj",
"lr",
"n_iters",
"verbose",
"copy",
)
@doctest_needs("trimap")
def trimap( # noqa: PLR0913
adata: AnnData,
n_components: int = 2,
*,
n_inliers: int = 10,
n_outliers: int = 5,
n_random: int = 5,
metric: Literal["angular", "euclidean", "hamming", "manhattan"] = "euclidean",
weight_adj: float = 500.0,
lr: float = 1000.0,
n_iters: int = 400,
verbose: bool | int | None = None,
copy: bool = False,
) -> AnnData | None:
"""TriMap: Large-scale Dimensionality Reduction Using Triplets :cite:p:`Amid2019`.
TriMap is a dimensionality reduction method that uses triplet constraints
to form a low-dimensional embedding of a set of points. The triplet
constraints are of the form "point i is closer to point j than point k".
The triplets are sampled from the high-dimensional representation of the
points and a weighting scheme is used to reflect the importance of each
triplet.
TriMap provides a significantly better global view of the data than the
other dimensionality reduction methods such t-SNE, LargeVis, and UMAP.
The global structure includes relative distances of the clusters, multiple
scales in the data, and the existence of possible outliers. We define a
global score to quantify the quality of an embedding in reflecting the
global structure of the data.
Parameters
----------
adata
Annotated data matrix.
n_components
Number of dimensions of the embedding.
n_inliers
Number of inlier points for triplet constraints.
n_outliers
Number of outlier points for triplet constraints.
n_random
Number of random triplet constraints per point.
metric
Distance measure: 'angular', 'euclidean', 'hamming', 'manhattan'.
weight_adj
Adjusting the weights using a non-linear transformation.
lr
Learning rate.
n_iters
Number of iterations.
verbose
If `True`, print the progress report.
If `None`, `sc.settings.verbosity` is used.
copy
Return a copy instead of writing to `adata`.
Returns
-------
Depending on `copy`, returns or updates `adata` with the following fields.
**X_trimap** : :class:`~numpy.ndarray`, (:attr:`~anndata.AnnData.obsm`, shape=(n_samples, n_components), dtype `float`)
TriMap coordinates of data.
Example
-------
>>> import scanpy as sc
>>> import scanpy.external as sce
>>> pbmc = sc.datasets.pbmc68k_reduced()
>>> pbmc = sce.tl.trimap(pbmc, copy=True)
>>> sce.pl.trimap(pbmc, color=["bulk_labels"], s=10)
"""
try:
from trimap import TRIMAP
except ImportError as e:
e.add_note("Please install `trimap` and try again.")
raise
adata = adata.copy() if copy else adata
start = logg.info("computing TriMap")
adata = adata.copy() if copy else adata
verbosity = settings.verbosity if verbose is None else verbose
verbose = verbosity if isinstance(verbosity, bool) else verbosity > 0
if "X_pca" in adata.obsm:
n_dim_pca = adata.obsm["X_pca"].shape[1]
x = adata.obsm["X_pca"][:, : min(n_dim_pca, 100)]
else:
x = adata.X
if isinstance(x, CSBase):
msg = (
"trimap currently does not support sparse matrices. Please"
"use a dense matrix or apply pca first."
)
raise ValueError(msg)
logg.warning("`X_pca` not found. Run `sc.pp.pca` first for speedup.")
x_trimap = TRIMAP(
n_dims=n_components,
n_inliers=n_inliers,
n_outliers=n_outliers,
n_random=n_random,
lr=lr,
distance=metric,
weight_adj=weight_adj,
n_iters=n_iters,
verbose=verbose,
).fit_transform(x)
adata.obsm["X_trimap"] = x_trimap
logg.info(
" finished",
time=start,
deep="added\n 'X_trimap', TriMap coordinates (adata.obsm)",
)
return adata if copy else None