Skip to content

Commit 57c41fa

Browse files
committed
Add additional input checks to check for frequently occuring input erros
1 parent 9c58aba commit 57c41fa

5 files changed

Lines changed: 161 additions & 2 deletions

File tree

R/muscat_de.R

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,20 +86,37 @@ perform_muscat_de_analysis = function(sce, sample_id, celltype_id, group_id, bat
8686
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,celltype_id]))){
8787
stop("The levels of the factor SummarizedExperiment::colData(sce)[,celltype_id] should be a syntactically valid R names - see make.names")
8888
}
89+
} else{
90+
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,celltype_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,celltype_id])))
91+
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,celltype_id]))))){
92+
stop("All the cell type labels in SummarizedExperiment::colData(sce)[,celltype_id] should be syntactically valid R names - see make.names")
93+
}
8994
}
95+
9096
if(is.factor(SummarizedExperiment::colData(sce)[,group_id])){
9197
is_make_names = levels(SummarizedExperiment::colData(sce)[,group_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,group_id]))
9298
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,group_id]))){
9399
stop("The levels of the factor SummarizedExperiment::colData(sce)[,group_id] should be a syntactically valid R names - see make.names")
94100
}
101+
} else{
102+
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,group_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,group_id])))
103+
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,group_id]))))){
104+
stop("All the group/condition labels in SummarizedExperiment::colData(sce)[,group_id] should be syntactically valid R names - see make.names")
105+
}
95106
}
96107
if(is.factor(SummarizedExperiment::colData(sce)[,sample_id])){
97108
is_make_names = levels(SummarizedExperiment::colData(sce)[,sample_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,sample_id]))
98109
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,sample_id]))){
99110
stop("The levels of the factor SummarizedExperiment::colData(sce)[,sample_id] should be a syntactically valid R names - see make.names")
100111
}
112+
} else{
113+
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,sample_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,sample_id])))
114+
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,sample_id]))))){
115+
stop("All the sample_id labels in SummarizedExperiment::colData(sce)[,sample_id] should be syntactically valid R names - see make.names")
116+
}
101117
}
102118

119+
103120
if(!is.character(contrasts)){
104121
stop("contrasts should be a character vector")
105122
}
@@ -173,9 +190,21 @@ perform_muscat_de_analysis = function(sce, sample_id, celltype_id, group_id, bat
173190
sid = "id", # sample IDs (ctrl/stim.1234)
174191
drop = FALSE) # drop all other SummarizedExperiment::colData columns ----------------- change to false
175192

193+
# test to see whether sample_ids are unique
194+
if (sum(table(sce$sample_id, sce$group_id) %>% apply(1, function(row_oi){sum(row_oi > 0)}) > 1) > 0){
195+
stop("One or more of your sample_ids belongs to more than one group/condition of interest. Please make sure that all sample_ids are uniquely divided over your groups/conditions.")
196+
}
197+
176198
pb = muscat::aggregateData(sce,
177199
assay = assay_oi_pb, fun = fun_oi_pb,
178200
by = c("cluster_id", "sample_id"))
201+
202+
if(assay_oi_pb == "counts"){
203+
libsizes = colSums(SummarizedExperiment::assay(pb))
204+
if (!isTRUE(all(libsizes == floor(libsizes)))) {
205+
warning("non-integer library sizes: are you sure you are working with raw counts?")
206+
}
207+
}
179208

180209
# prepare the experiment info (ei) table if batches present
181210
if(length(batches) > 1){

R/pipeline_wrappers.R

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,42 @@ get_abundance_expression_info = function(sce, sample_id, group_id, celltype_id,
3333

3434
requireNamespace("dplyr")
3535
requireNamespace("ggplot2")
36+
37+
# if some of these are factors, and not all levels have syntactically valid names - prompt to change this
38+
if(is.factor(SummarizedExperiment::colData(sce)[,celltype_id])){
39+
is_make_names = levels(SummarizedExperiment::colData(sce)[,celltype_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,celltype_id]))
40+
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,celltype_id]))){
41+
stop("The levels of the factor SummarizedExperiment::colData(sce)[,celltype_id] should be a syntactically valid R names - see make.names")
42+
}
43+
} else{
44+
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,celltype_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,celltype_id])))
45+
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,celltype_id]))))){
46+
stop("All the cell type labels in SummarizedExperiment::colData(sce)[,celltype_id] should be syntactically valid R names - see make.names")
47+
}
48+
}
49+
50+
if(is.factor(SummarizedExperiment::colData(sce)[,group_id])){
51+
is_make_names = levels(SummarizedExperiment::colData(sce)[,group_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,group_id]))
52+
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,group_id]))){
53+
stop("The levels of the factor SummarizedExperiment::colData(sce)[,group_id] should be a syntactically valid R names - see make.names")
54+
}
55+
} else{
56+
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,group_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,group_id])))
57+
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,group_id]))))){
58+
stop("All the group/condition labels in SummarizedExperiment::colData(sce)[,group_id] should be syntactically valid R names - see make.names")
59+
}
60+
}
61+
if(is.factor(SummarizedExperiment::colData(sce)[,sample_id])){
62+
is_make_names = levels(SummarizedExperiment::colData(sce)[,sample_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,sample_id]))
63+
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,sample_id]))){
64+
stop("The levels of the factor SummarizedExperiment::colData(sce)[,sample_id] should be a syntactically valid R names - see make.names")
65+
}
66+
} else{
67+
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,sample_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,sample_id])))
68+
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,sample_id]))))){
69+
stop("All the sample_id labels in SummarizedExperiment::colData(sce)[,sample_id] should be syntactically valid R names - see make.names")
70+
}
71+
}
3672

3773
### Receiver abundance plots
3874

@@ -218,6 +254,76 @@ get_abundance_expression_info_separate = function(sce_receiver, sce_sender, samp
218254
requireNamespace("dplyr")
219255
requireNamespace("ggplot2")
220256

257+
# if some of these are factors, and not all levels have syntactically valid names - prompt to change this
258+
if(is.factor(SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver])){
259+
is_make_names = levels(SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver]) == make.names(levels(SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver]))
260+
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver]))){
261+
stop("The levels of the factor SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver] should be a syntactically valid R names - see make.names")
262+
}
263+
} else{
264+
is_make_names = unique(sort(SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver])) == make.names(unique(sort(SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver])))
265+
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver]))))){
266+
stop("All the cell type labels in SummarizedExperiment::colData(sce_receiver)[,celltype_id_receiver] should be syntactically valid R names - see make.names")
267+
}
268+
}
269+
270+
if(is.factor(SummarizedExperiment::colData(sce_receiver)[,group_id])){
271+
is_make_names = levels(SummarizedExperiment::colData(sce_receiver)[,group_id]) == make.names(levels(SummarizedExperiment::colData(sce_receiver)[,group_id]))
272+
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce_receiver)[,group_id]))){
273+
stop("The levels of the factor SummarizedExperiment::colData(sce_receiver)[,group_id] should be a syntactically valid R names - see make.names")
274+
}
275+
} else{
276+
is_make_names = unique(sort(SummarizedExperiment::colData(sce_receiver)[,group_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce_receiver)[,group_id])))
277+
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce_receiver)[,group_id]))))){
278+
stop("All the group/condition labels in SummarizedExperiment::colData(sce_receiver)[,group_id] should be syntactically valid R names - see make.names")
279+
}
280+
}
281+
if(is.factor(SummarizedExperiment::colData(sce_receiver)[,sample_id])){
282+
is_make_names = levels(SummarizedExperiment::colData(sce_receiver)[,sample_id]) == make.names(levels(SummarizedExperiment::colData(sce_receiver)[,sample_id]))
283+
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce_receiver)[,sample_id]))){
284+
stop("The levels of the factor SummarizedExperiment::colData(sce_receiver)[,sample_id] should be a syntactically valid R names - see make.names")
285+
}
286+
} else{
287+
is_make_names = unique(sort(SummarizedExperiment::colData(sce_receiver)[,sample_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce_receiver)[,sample_id])))
288+
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce_receiver)[,sample_id]))))){
289+
stop("All the sample_id labels in SummarizedExperiment::colData(sce_receiver)[,sample_id] should be syntactically valid R names - see make.names")
290+
}
291+
}
292+
# if some of these are factors, and not all levels have syntactically valid names - prompt to change this
293+
if(is.factor(SummarizedExperiment::colData(sce_sender)[,celltype_id_sender])){
294+
is_make_names = levels(SummarizedExperiment::colData(sce_sender)[,celltype_id_sender]) == make.names(levels(SummarizedExperiment::colData(sce_sender)[,celltype_id_sender]))
295+
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce_sender)[,celltype_id_sender]))){
296+
stop("The levels of the factor SummarizedExperiment::colData(sce_sender)[,celltype_id_sender] should be a syntactically valid R names - see make.names")
297+
}
298+
} else{
299+
is_make_names = unique(sort(SummarizedExperiment::colData(sce_sender)[,celltype_id_sender])) == make.names(unique(sort(SummarizedExperiment::colData(sce_sender)[,celltype_id_sender])))
300+
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce_sender)[,celltype_id_sender]))))){
301+
stop("All the cell type labels in SummarizedExperiment::colData(sce_sender)[,celltype_id_sender] should be syntactically valid R names - see make.names")
302+
}
303+
}
304+
305+
if(is.factor(SummarizedExperiment::colData(sce_sender)[,group_id])){
306+
is_make_names = levels(SummarizedExperiment::colData(sce_sender)[,group_id]) == make.names(levels(SummarizedExperiment::colData(sce_sender)[,group_id]))
307+
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce_sender)[,group_id]))){
308+
stop("The levels of the factor SummarizedExperiment::colData(sce_sender)[,group_id] should be a syntactically valid R names - see make.names")
309+
}
310+
} else{
311+
is_make_names = unique(sort(SummarizedExperiment::colData(sce_sender)[,group_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce_sender)[,group_id])))
312+
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce_sender)[,group_id]))))){
313+
stop("All the group/condition labels in SummarizedExperiment::colData(sce_sender)[,group_id] should be syntactically valid R names - see make.names")
314+
}
315+
}
316+
if(is.factor(SummarizedExperiment::colData(sce_sender)[,sample_id])){
317+
is_make_names = levels(SummarizedExperiment::colData(sce_sender)[,sample_id]) == make.names(levels(SummarizedExperiment::colData(sce_sender)[,sample_id]))
318+
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce_sender)[,sample_id]))){
319+
stop("The levels of the factor SummarizedExperiment::colData(sce_sender)[,sample_id] should be a syntactically valid R names - see make.names")
320+
}
321+
} else{
322+
is_make_names = unique(sort(SummarizedExperiment::colData(sce_sender)[,sample_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce_sender)[,sample_id])))
323+
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce_sender)[,sample_id]))))){
324+
stop("All the sample_id labels in SummarizedExperiment::colData(sce_sender)[,sample_id] should be syntactically valid R names - see make.names")
325+
}
326+
}
221327
### Receiver plots and info
222328

223329
metadata_abundance = SummarizedExperiment::colData(sce_receiver)[,c(sample_id, group_id, celltype_id_receiver)] %>% tibble::as_tibble()
@@ -567,18 +673,34 @@ get_DE_info = function(sce, sample_id, group_id, celltype_id, batches, covariate
567673
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,celltype_id]))){
568674
stop("The levels of the factor SummarizedExperiment::colData(sce)[,celltype_id] should be a syntactically valid R names - see make.names")
569675
}
676+
} else{
677+
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,celltype_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,celltype_id])))
678+
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,celltype_id]))))){
679+
stop("All the cell type labels in SummarizedExperiment::colData(sce)[,celltype_id] should be syntactically valid R names - see make.names")
680+
}
570681
}
682+
571683
if(is.factor(SummarizedExperiment::colData(sce)[,group_id])){
572684
is_make_names = levels(SummarizedExperiment::colData(sce)[,group_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,group_id]))
573685
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,group_id]))){
574686
stop("The levels of the factor SummarizedExperiment::colData(sce)[,group_id] should be a syntactically valid R names - see make.names")
575687
}
688+
} else{
689+
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,group_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,group_id])))
690+
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,group_id]))))){
691+
stop("All the group/condition labels in SummarizedExperiment::colData(sce)[,group_id] should be syntactically valid R names - see make.names")
692+
}
576693
}
577694
if(is.factor(SummarizedExperiment::colData(sce)[,sample_id])){
578695
is_make_names = levels(SummarizedExperiment::colData(sce)[,sample_id]) == make.names(levels(SummarizedExperiment::colData(sce)[,sample_id]))
579696
if(sum(is_make_names) != length(levels(SummarizedExperiment::colData(sce)[,sample_id]))){
580697
stop("The levels of the factor SummarizedExperiment::colData(sce)[,sample_id] should be a syntactically valid R names - see make.names")
581698
}
699+
} else{
700+
is_make_names = unique(sort(SummarizedExperiment::colData(sce)[,sample_id])) == make.names(unique(sort(SummarizedExperiment::colData(sce)[,sample_id])))
701+
if(sum(is_make_names) != length(unique(sort((SummarizedExperiment::colData(sce)[,sample_id]))))){
702+
stop("All the sample_id labels in SummarizedExperiment::colData(sce)[,sample_id] should be syntactically valid R names - see make.names")
703+
}
582704
}
583705

584706
if(!is.character(contrasts_oi)){

README.Rmd

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ When applying MultiNicheNet on datasets with many samples and cell types, it is
9595

9696
## Frequently recurring questions and issues
9797

98-
* Even though it is stated in the vignettes, many reported issues arise because names of celltypes, groups/conditions, and/or samples are not syntactically valid. Before reporting your issue, make sure you satisfy this condition and other conditions described in the vignettes.
98+
* Even though it is stated in the vignettes, many reported issues arise because names of celltypes, groups/conditions, and/or samples are not syntactically valid. Before reporting your issue, make sure you satisfy this condition and other conditions described in the vignettes. In the latest version of MultiNicheNet, input checks are run to check this and give an understandable error message.
99+
* It is required that each sample is uniquely assigned to only one condition/group of interest. See the vignettes about paired and multifactorial analysis to see how to define your analysis input when you have multiple samples and conditions per patient. In the latest version of MultiNicheNet, input checks are run to check this and give an understandable error message.
99100
* We strongly recommend having at least 4 samples in each of the groups/conditions you want to compare. With less samples, the benefits of performing a pseudobulk-based DE analysis are less clear and non-multi-sample tools for differential cell-cell communication might be better alternatives.
100101

101102
## References

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,14 @@ plots; and 2) interpreting the results and generating visualizations.
190190
arise because names of celltypes, groups/conditions, and/or samples
191191
are not syntactically valid. Before reporting your issue, make sure
192192
you satisfy this condition and other conditions described in the
193-
vignettes.
193+
vignettes. In the latest version of MultiNicheNet, input checks are
194+
run to check this and give an understandable error message.
195+
- It is required that each sample is uniquely assigned to only one
196+
condition/group of interest. See the vignettes about paired and
197+
multifactorial analysis to see how to define your analysis input
198+
when you have multiple samples and conditions per patient. In the
199+
latest version of MultiNicheNet, input checks are run to check this
200+
and give an understandable error message.
194201
- We strongly recommend having at least 4 samples in each of the
195202
groups/conditions you want to compare. With less samples, the
196203
benefits of performing a pseudobulk-based DE analysis are less clear

tests/testthat/Rplots.pdf

0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)