Skip to content

Commit c4bc630

Browse files
authored
Merge pull request #92 from LieberInstitute/spatial_registration_syntatic_check
Check var_registration is syntactically valid in spatial registration functions
2 parents 9eda069 + 94a99ac commit c4bc630

File tree

5 files changed

+102
-10
lines changed

5 files changed

+102
-10
lines changed

R/registration_pseudobulk.R

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
#' object or one that inherits its properties.
99
#' @param var_registration A `character(1)` specifying the `colData(sce)`
1010
#' variable of interest against which will be used for computing the relevant
11-
#' statistics.
11+
#' statistics. This should be a categorical variable, with all categories
12+
#' syntaticly valid (could be used as an R variable, no special characters or
13+
#' leading numbers), ex. 'L1.2', 'celltype2' not 'L1/2' or '2'.
1214
#' @param var_sample_id A `character(1)` specifying the `colData(sce)` variable
1315
#' with the sample ID.
1416
#' @param covars A `character()` with names of sample-level covariates.
@@ -71,15 +73,22 @@ registration_pseudobulk <-
7173
stopifnot(!var_sample_id %in% covars)
7274
stopifnot(var_registration != var_sample_id)
7375

74-
## Check that the values in the registration variable are ok
76+
## Check that the values in the registration variable are numeric
77+
if(is.numeric(sce[[var_registration]])){
78+
warning(sprintf("var_registration \"%s\" is numeric, convering to catagorial vector...",
79+
var_registration))
80+
}
81+
82+
## check for Non-Syntactic variables - convert with make.names & warn
7583
uniq_var_regis <- unique(sce[[var_registration]])
76-
if (any(grepl("\\+|\\-", uniq_var_regis))) {
77-
stop(
78-
"Remove the + and - signs in colData(sce)[, '",
79-
var_registration,
80-
"'] to avoid downstream issues.",
81-
call. = FALSE
84+
syntatic <- grepl("^((([[:alpha:]]|[.][._[:alpha:]])[._[:alnum:]]*)|[.])$", uniq_var_regis)
85+
if (!all(syntatic)) {
86+
warning(sprintf("var_registration \"%s\" contains non-syntatic variables: %s\nconverting to %s",
87+
var_registration,
88+
paste(uniq_var_regis[!syntatic], collapse = ", "),
89+
paste(make.names(uniq_var_regis[!syntatic]), collapse = ", "))
8290
)
91+
sce[[var_registration]] <- make.names(sce[[var_registration]])
8392
}
8493

8594
## Pseudo-bulk for our current BayesSpace cluster results

man/registration_pseudobulk.Rd

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/registration_wrapper.Rd

Lines changed: 3 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-registration_pseudobulk.R

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,42 @@ test_that("NA check works", {
1111
"var_registration"
1212
)
1313
})
14+
15+
16+
#### Syntactic Variable Test ####
17+
set.seed(20220907) ## Ensure reproducibility of example data
18+
sce <- scuttle::mockSCE()
19+
## Add some sample IDs
20+
sce$sample_id <- sample(LETTERS[1:5], ncol(sce), replace = TRUE)
21+
22+
## Add a sample-level covariate: age
23+
ages <- rnorm(5, mean = 20, sd = 4)
24+
names(ages) <- LETTERS[1:5]
25+
sce$age <- ages[sce$sample_id]
26+
27+
## add variable with one group
28+
sce$batch <- "batch1"
29+
30+
## non-syntactic inputs
31+
sce$cluster_int <- sample(1:4, ncol(sce), replace = TRUE)
32+
# sce$cluster_k <- paste0("k", sce$cluster_int)
33+
sce$cluster_j <- paste0(sce$cluster_int,"j")
34+
sce$cluster_l <- sample(c("L-1", "L2/3", "4L", "L5"), ncol(sce), replace = TRUE)
35+
36+
test_that("warn for numeric var_registration",
37+
expect_warning(registration_pseudobulk(sce,
38+
var_registration = "cluster_int",
39+
var_sample_id = "sample_id",
40+
covars = c("age"),
41+
min_ncells = NULL))
42+
)
43+
44+
45+
test_that("warn for non-syntactic var_registration",
46+
expect_warning(registration_pseudobulk(sce,
47+
var_registration = "cluster_l",
48+
var_sample_id = "sample_id",
49+
covars = c("age"),
50+
min_ncells = NULL))
51+
)
52+

tests/testthat/test-registration_wrapper.R

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,43 @@ test_that(
3434
)
3535
)
3636
)
37+
38+
#### Syntactic Variable Test ####
39+
40+
## catagorical var as int
41+
sce$cluster_int <- sample(1:4, ncol(sce), replace = TRUE)
42+
# sce$cluster_k <- paste0("k", sce$cluster_int)
43+
sce$cluster_j <- paste0(sce$cluster_int,"j")
44+
sce$cluster_l <- sample(c("L-1", "L2/3", "4L", "L5"), ncol(sce), replace = TRUE)
45+
46+
table(sce$cluster_j)
47+
48+
test_that("Numeric var_regisration throws warning",
49+
expect_warning(registration_wrapper(
50+
sce,
51+
var_registration = "cluster_int",
52+
var_sample_id = "sample_id",
53+
covars = c("age"),
54+
gene_ensembl = "ensembl",
55+
gene_name = "gene_name",
56+
suffix = "wrapper"
57+
)))
58+
59+
test_that("Non-Syntactic thows warning",
60+
expect_warning(registration_wrapper(
61+
sce,
62+
var_registration = "cluster_l",
63+
var_sample_id = "sample_id",
64+
covars = c("age"),
65+
gene_ensembl = "ensembl",
66+
gene_name = "gene_name",
67+
suffix = "wrapper"
68+
)))
69+
70+
71+
72+
73+
74+
75+
76+

0 commit comments

Comments
 (0)