-
Notifications
You must be signed in to change notification settings - Fork 946
/
Copy pathRunUMAP.Rd
268 lines (227 loc) · 9.43 KB
/
RunUMAP.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generics.R, R/dimensional_reduction.R
\name{RunUMAP}
\alias{RunUMAP}
\alias{RunUMAP.default}
\alias{RunUMAP.Graph}
\alias{RunUMAP.Neighbor}
\alias{RunUMAP.Seurat}
\title{Run UMAP}
\usage{
RunUMAP(object, ...)
\method{RunUMAP}{default}(
object,
reduction.key = "UMAP_",
assay = NULL,
reduction.model = NULL,
return.model = FALSE,
umap.method = "uwot",
n.neighbors = 30L,
n.components = 2L,
metric = "cosine",
n.epochs = NULL,
learning.rate = 1,
min.dist = 0.3,
spread = 1,
set.op.mix.ratio = 1,
local.connectivity = 1L,
repulsion.strength = 1,
negative.sample.rate = 5,
a = NULL,
b = NULL,
uwot.sgd = FALSE,
seed.use = 42,
metric.kwds = NULL,
angular.rp.forest = FALSE,
densmap = FALSE,
dens.lambda = 2,
dens.frac = 0.3,
dens.var.shift = 0.1,
verbose = TRUE,
...
)
\method{RunUMAP}{Graph}(
object,
assay = NULL,
umap.method = "umap-learn",
n.components = 2L,
metric = "correlation",
n.epochs = 0L,
learning.rate = 1,
min.dist = 0.3,
spread = 1,
repulsion.strength = 1,
negative.sample.rate = 5L,
a = NULL,
b = NULL,
uwot.sgd = FALSE,
seed.use = 42L,
metric.kwds = NULL,
densmap = FALSE,
densmap.kwds = NULL,
verbose = TRUE,
reduction.key = "UMAP_",
...
)
\method{RunUMAP}{Neighbor}(object, reduction.model, ...)
\method{RunUMAP}{Seurat}(
object,
dims = NULL,
reduction = "pca",
features = NULL,
graph = NULL,
assay = DefaultAssay(object = object),
nn.name = NULL,
slot = "data",
umap.method = "uwot",
reduction.model = NULL,
return.model = FALSE,
n.neighbors = 30L,
n.components = 2L,
metric = "cosine",
n.epochs = NULL,
learning.rate = 1,
min.dist = 0.3,
spread = 1,
set.op.mix.ratio = 1,
local.connectivity = 1L,
repulsion.strength = 1,
negative.sample.rate = 5L,
a = NULL,
b = NULL,
uwot.sgd = FALSE,
seed.use = 42L,
metric.kwds = NULL,
angular.rp.forest = FALSE,
densmap = FALSE,
dens.lambda = 2,
dens.frac = 0.3,
dens.var.shift = 0.1,
verbose = TRUE,
reduction.name = "umap",
reduction.key = NULL,
...
)
}
\arguments{
\item{object}{An object}
\item{...}{Arguments passed to other methods and UMAP}
\item{reduction.key}{dimensional reduction key, specifies the string before
the number for the dimension names. UMAP by default}
\item{assay}{Assay to pull data for when using \code{features}, or assay used to construct Graph
if running UMAP on a Graph}
\item{reduction.model}{\code{DimReduc} object that contains the umap model}
\item{return.model}{whether UMAP will return the uwot model}
\item{umap.method}{UMAP implementation to run. Can be
\describe{
\item{\code{uwot}:}{Runs umap via the uwot R package (\code{uwot::umap})}
\item{\code{uwot2}:}{Runs umap2 via the uwot R package (\code{uwot::umap2})}
\item{\code{umap-learn}:}{Run the Seurat wrapper of the python umap-learn package}
}}
\item{n.neighbors}{This determines the number of neighboring points used in
local approximations of manifold structure. Larger values will result in more
global structure being preserved at the loss of detailed local structure. In
general this parameter should often be in the range 5 to 50.}
\item{n.components}{The dimension of the space to embed into.}
\item{metric}{metric: This determines the choice of metric used to measure
distance in the input space. A wide variety of metrics are already coded, and
a user defined function can be passed as long as it has been JITd by numba.}
\item{n.epochs}{he number of training epochs to be used in optimizing the low dimensional
embedding. Larger values result in more accurate embeddings. If NULL is specified, a value will
be selected based on the size of the input dataset (200 for large datasets, 500 for small).}
\item{learning.rate}{The initial learning rate for the embedding optimization.}
\item{min.dist}{This controls how tightly the embedding is allowed compress points together.
Larger values ensure embedded points are more evenly distributed, while smaller values allow the
algorithm to optimize more accurately with regard to local structure. Sensible values are in
the range 0.001 to 0.5.}
\item{spread}{The effective scale of embedded points. In combination with min.dist this
determines how clustered/clumped the embedded points are.}
\item{set.op.mix.ratio}{Interpolate between (fuzzy) union and intersection as the set operation
used to combine local fuzzy simplicial sets to obtain a global fuzzy simplicial sets. Both fuzzy
set operations use the product t-norm. The value of this parameter should be between 0.0 and
1.0; a value of 1.0 will use a pure fuzzy union, while 0.0 will use a pure fuzzy intersection.}
\item{local.connectivity}{The local connectivity required - i.e. the number of nearest neighbors
that should be assumed to be connected at a local level. The higher this value the more connected
the manifold becomes locally. In practice this should be not more than the local intrinsic
dimension of the manifold.}
\item{repulsion.strength}{Weighting applied to negative samples in low dimensional embedding
optimization. Values higher than one will result in greater weight being given to negative
samples.}
\item{negative.sample.rate}{The number of negative samples to select per positive sample in the
optimization process. Increasing this value will result in greater repulsive force being applied,
greater optimization cost, but slightly more accuracy.}
\item{a}{More specific parameters controlling the embedding. If NULL, these values are set
automatically as determined by min. dist and spread. Parameter of differentiable approximation of
right adjoint functor.}
\item{b}{More specific parameters controlling the embedding. If NULL, these values are set
automatically as determined by min. dist and spread. Parameter of differentiable approximation of
right adjoint functor.}
\item{uwot.sgd}{Set \code{uwot::umap(fast_sgd = TRUE)}; see \code{\link[uwot]{umap}} for more details}
\item{seed.use}{Set a random seed. By default, sets the seed to 42. Setting
NULL will not set a seed}
\item{metric.kwds}{A dictionary of arguments to pass on to the metric, such as the p value for
Minkowski distance. If NULL then no arguments are passed on.}
\item{angular.rp.forest}{Whether to use an angular random projection forest to initialize the
approximate nearest neighbor search. This can be faster, but is mostly on useful for metric that
use an angular style distance such as cosine, correlation etc. In the case of those metrics
angular forests will be chosen automatically.}
\item{densmap}{Whether to use the density-augmented objective of densMAP.
Turning on this option generates an embedding where the local densities
are encouraged to be correlated with those in the original space.
Parameters below with the prefix ‘dens’ further control the behavior
of this extension. Default is FALSE. Only compatible with 'umap-learn' method
and version of umap-learn >= 0.5.0}
\item{dens.lambda}{Specific parameter which controls the regularization weight
of the density correlation term in densMAP. Higher values prioritize density
preservation over the UMAP objective, and vice versa for values closer to zero.
Setting this parameter to zero is equivalent to running the original UMAP algorithm.
Default value is 2.}
\item{dens.frac}{Specific parameter which controls the fraction of epochs
(between 0 and 1) where the density-augmented objective is used in densMAP.
The first (1 - dens_frac) fraction of epochs optimize the original UMAP
objective before introducing the density correlation term. Default is 0.3.}
\item{dens.var.shift}{Specific parameter which specifies a small constant
added to the variance of local radii in the embedding when calculating
the density correlation objective to prevent numerical instability from
dividing by a small number. Default is 0.1.}
\item{verbose}{Controls verbosity}
\item{densmap.kwds}{A dictionary of arguments to pass on to the densMAP optimization.}
\item{dims}{Which dimensions to use as input features, used only if
\code{features} is NULL}
\item{reduction}{Which dimensional reduction (PCA or ICA) to use for the
UMAP input. Default is PCA}
\item{features}{If set, run UMAP on this subset of features (instead of running on a
set of reduced dimensions). Not set (NULL) by default; \code{dims} must be NULL to run
on features}
\item{graph}{Name of graph on which to run UMAP}
\item{nn.name}{Name of knn output on which to run UMAP}
\item{slot}{The slot used to pull data for when using \code{features}. data slot is by default.}
\item{reduction.name}{Name to store dimensional reduction under in the Seurat object}
}
\value{
Returns a Seurat object containing a UMAP representation
}
\description{
Runs the Uniform Manifold Approximation and Projection (UMAP) dimensional
reduction technique. To run using \code{umap.method="umap-learn"}, you must
first install the umap-learn python package (e.g. via
\code{pip install umap-learn}). Details on this package can be
found here: \url{https://github.com/lmcinnes/umap}. For a more in depth
discussion of the mathematics underlying UMAP, see the ArXiv paper here:
\url{https://arxiv.org/abs/1802.03426}.
}
\examples{
\dontrun{
data("pbmc_small")
pbmc_small
# Run UMAP map on first 5 PCs
pbmc_small <- RunUMAP(object = pbmc_small, dims = 1:5)
# Plot results
DimPlot(object = pbmc_small, reduction = 'umap')
}
}
\references{
McInnes, L, Healy, J, UMAP: Uniform Manifold Approximation and
Projection for Dimension Reduction, ArXiv e-prints 1802.03426, 2018
}
\concept{dimensional_reduction}