forked from scverse/anndataR
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.R
More file actions
316 lines (290 loc) · 8.18 KB
/
utils.R
File metadata and controls
316 lines (290 loc) · 8.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
wrap_message <- function(...) {
txt <- paste0(..., collapse = "")
paste(strwrap(txt, exdent = 2L), collapse = "\n")
}
has_row_names <- function(x) {
if (is.data.frame(x)) {
.row_names_info(x) > 0
} else {
!is.null(dimnames(x)[[1]])
}
}
get_shape <- function(obs, var, X, shape) {
n_obs <-
if (!is.null(obs)) {
nrow(obs)
} else if (!is.null(X)) {
nrow(X)
} else if (!is.null(shape)) {
shape[[1]]
} else {
0L
}
n_vars <-
if (!is.null(var)) {
nrow(var)
} else if (!is.null(X)) {
ncol(X)
} else if (!is.null(shape)) {
shape[[2]]
} else {
0L
}
c(n_obs, n_vars)
}
get_initial_obs <- function(obs, X, shape) {
if (is.null(obs)) {
obs <- data.frame(matrix(NA, nrow = shape[[1]], ncol = 0))
if (!is.null(X)) {
rownames(obs) <- rownames(X)
}
}
obs
}
get_initial_var <- function(var, X, shape) {
if (is.null(var)) {
var <- data.frame(matrix(NA, nrow = shape[[2]], ncol = 0))
if (!is.null(X)) {
rownames(var) <- colnames(X)
}
}
var
}
to_py_matrix <- function(mat) {
if (inherits(mat, "dgCMatrix")) {
mat <- as(mat, "RsparseMatrix")
} else if (!inherits(mat, "dgRMatrix")) {
mat <- as.matrix(mat)
}
Matrix::t(mat)
}
# nolint start: object_name_linter
to_R_matrix <- function(mat) {
# nolint end: object_name_linter
if (inherits(mat, "dgRMatrix")) {
mat <- as(mat, "CsparseMatrix")
} else if (!inherits(mat, "dgCMatrix")) {
mat <- as.matrix(mat)
}
Matrix::t(mat)
}
self_name <- function(x) {
if (is.null(names(x))) {
x <- setNames(x, x)
} else if (!all(nzchar(names(x)))) {
is_missing <- !nzchar(names(x))
names(x)[is_missing] <- x[is_missing]
}
x
}
#' Get mapping
#'
#' Get a mapping argument for a conversion function
#'
#' @param mapping The user-supplied mapping argument. Can be a named vector,
#' `TRUE` or `FALSE`.
#' @param guesser A function that guesses the default mapping from `obj` if
#' `mapping` is `TRUE`
#' @param obj The object that is being converted and is passed to `guesser`
#' if needed
#' @param name The name of the mapping argument, used for error messages
#' @param ... Additional arguments passed to `guesser`
#'
#' @description
#' If `mapping` is `NULL` or empty it is set to `FALSE` with a warning. `FALSE`
#' values return an empty mapping.
#'
#' @returns A named mapping vector
#' @noRd
get_mapping <- function(mapping, guesser, obj, name, ...) {
if (rlang::is_empty(mapping)) {
cli_warn(c(
"The {.arg {name}} argument is empty, setting it to {.val {FALSE}}"
))
mapping <- FALSE
}
# If FALSE, return an empty mapping
if (isFALSE(mapping)) {
return(list())
}
# If TRUE, use the guesser function to get the default mapping
if (isTRUE(mapping)) {
return(guesser(obj, ...))
}
if (!is.vector(mapping)) {
cli_abort(paste(
"{.arg {name}} must be a vector, {.val {TRUE}} or {.val {FALSE}}, not",
"{.cls {class(mapping)}}"
))
}
# Make sure provided mapping has names
self_name(mapping)
}
#' Check dimensions and skip
#'
#' Check the dimensions of a matrix-like object and return `NULL` if they do not
#' match the expected dimensions, with a warning. For use in conversion
#' functions to skip items that do not match the required dimensions.
#'
#' @param x The object to check
#' @param field The field the object comes from, used in the warning message
#' @param name The name of the object, used in the warning message
#' @param expected_dims Expected dimensions
#' @param expected_rows Expected number of rows
#' @param expected_cols Expected number of columns
#'
#' @returns The object `x` if it matches the expected dimensions, otherwise
#' `NULL`
#' @noRd
check_dims_and_skip <- function(
x,
field,
name,
expected_dims = NULL,
expected_rows = NULL,
expected_cols = NULL
) {
msg <- NULL
if (!is.null(expected_dims) && !identical(dim(x), expected_dims)) {
expected_dims <- as.integer(expected_dims)
msg <- c(
"i" = paste0(
"Expected [{style_vec(expected_dims)}], ",
"got [{style_vec(as.integer(dim(x)))}]"
)
)
} else if (!is.null(expected_rows) && nrow(x) != expected_rows) {
msg <- c(
"i" = paste0(
"Expected {.val {expected_rows}} rows, got {.val {nrow(x)}}"
)
)
} else if (!is.null(expected_cols) && ncol(x) != expected_cols) {
msg <- c(
"i" = paste0(
"Expected {.val {expected_cols}} colums, got {.val {ncol(x)}}"
)
)
}
if (!is.null(msg)) {
cli_warn(
c(
"Skipping {.field {field}} {.val {name}} with unexpected dimensions",
msg
),
call = NULL
)
NULL
} else {
x
}
}
#' Warn matrix dim names not writeable
#'
#' Warn that matrix dim names can not be written to a given object
#'
#' @param mat The object to check, if not a matrix-like object nothing is
#' checked
#' @param label A label for `mat` to use in warning messages
#' @param to_object The object to which `mat` would be written
#' @param rows Whether to check row names
#' @param cols Whether to check column names
#'
#' @returns `NULL`, invisibly
#' @noRd
# nolint start: object_name_linter
warn_matrix_dimnames_not_writeable <- function(
mat,
label,
to_object,
rows = TRUE,
cols = TRUE
) {
# nolint end: object_name_linter
if (!is.matrix(mat) && !inherits(mat, "Matrix")) {
return(invisible())
}
if (rows && !is.null(rownames(mat))) {
cli_warn(
c(
paste(
"Matrix row names cannot be written to {.obj_type_friendly {to_object}},",
"they will be lost"
),
"i" = paste(
"To write row names for {.field {label}}, store it as",
"{.cls data.frame} instead of {.obj_type_friendly {mat}}"
),
"i" = "{.strong NOTE:} {.field obs_names} and {.field var_names} are stored separately"
),
call = NULL
)
}
if (cols && !is.null(colnames(mat))) {
cli_warn(
c(
paste(
"Matrix column names cannot be written to {.obj_type_friendly {to_object}},",
"they will be lost"
),
"i" = paste(
"To write column names for {.field {label}}, store it as",
"{.cls data.frame} instead of {.obj_type_friendly {mat}}"
),
"i" = "{.strong NOTE:} {.field obs_names} and {.field var_names} are stored separately"
),
call = NULL
)
}
invisible()
}
#' Construct a sparse matrix from CSR/CSC components
#'
#' Build a `dgCMatrix` or `dgRMatrix` from raw data, index, and pointer vectors.
#'
#' @param data Non-zero values. Coerced to `double`.
#' @param indices Column indices (CSC) or row indices (CSR), 0-based. Coerced
#' to `integer`.
#' @param indptr Index pointers, 0-based. Coerced to `integer`.
#' @param shape Matrix dimensions. Coerced to `integer`.
#' @param type Either `"csc_matrix"` or `"csr_matrix"`.
#'
#' @return A `dgCMatrix` (CSC) or `dgRMatrix` (CSR).
#'
#' @noRd
construct_sparse_matrix <- function(
data,
indices,
indptr,
shape,
type = c("csc_matrix", "csr_matrix")
) {
type <- match.arg(type)
data <- as.double(data)
indices <- as.integer(indices)
indptr <- as.integer(indptr)
shape <- as.integer(shape)
# The Matrix package validity checks require that indices are sorted within
# each major axis group (row indices within columns for CSC, column indices
# within rows for CSR). For sparse matrices in Python order isn't guaranteed,
# so we sort if needed.
if (length(indices) > 1L) {
row_lengths <- diff(indptr)
group_ids <- rep.int(seq_along(row_lengths), row_lengths)
ord <- order(group_ids, indices)
if (is.unsorted(ord)) {
indices <- indices[ord]
data <- data[ord]
}
}
if (type == "csc_matrix") {
# Directly construct dgCMatrix (CSC format) to avoid overhead of constructing
# a general sparseMatrix and then coercing to dgCMatrix
# Slots: i = row indices (0-based), p = col pointers, x = values, Dim
new("dgCMatrix", i = indices, p = indptr, x = data, Dim = shape)
} else if (type == "csr_matrix") {
# Directly construct dgRMatrix (CSR format)
# Slots: j = column indices (0-based), p = row pointers, x = values, Dim
new("dgRMatrix", j = indices, p = indptr, x = data, Dim = shape)
}
}