Skip to content

Commit 9bdf431

Browse files
authored
parametrize the operator that joins cell QC filters (| or &) (#7)
* parametrize the operator that joins cell QC filters (| or &) * use AND/OR in docs * validate DATASET_SENSITIVE_FILTERS_OPERATOR and CUSTOM_FILTERS_OPERATOR * bump version, update README
1 parent 56d8bbe commit 9bdf431

12 files changed

Lines changed: 89 additions & 13 deletions

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: scdrake
22
Type: Package
33
Title: A pipeline for droplet-based single-cell RNA-seq data secondary analysis implemented in the drake Make-like toolkit for R language
4-
Version: 1.5.1
4+
Version: 1.5.2
55
Authors@R:
66
c(
77
person(

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ export(get_single_sample_plan)
100100
export(get_sys_env)
101101
export(get_tmp_dir)
102102
export(get_top_hvgs)
103+
export(get_used_qc_filters_operator_desc)
103104
export(get_yq_default_path)
104105
export(glue0)
105106
export(glue0c)

R/config_process_single_sample.R

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,15 @@
2929
msg = "{.var input_data$type} must be {.vals possible_input_data_types}. Current value: {.val {cfg$INPUT_DATA$type}}"
3030
)
3131

32+
possible_filters_operators <- c("&", "|")
33+
for (param_name in c("DATASET_SENSITIVE_FILTERS_OPERATOR", "CUSTOM_FILTERS_OPERATOR")) {
34+
val <- cfg[[param_name]]
35+
assert_that_(
36+
val %in% possible_filters_operators,
37+
msg = "{.field {param_name}} must be one of {.vals {possible_filters_operators}}. Current value: {.val {val}}"
38+
)
39+
}
40+
3241
assert_that_(
3342
!is_null(cfg$INPUT_DATA$path),
3443
msg = "{.field INPUT_DATA$type} is not set, data cannot be loaded later."

R/plans_single_sample.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ get_input_qc_subplan <- function(cfg, cfg_pipeline, cfg_main) {
6060
),
6161
qc_filters = purrr::map(qc_filters_raw, ~ as.logical(.) %>% tidyr::replace_na(replace = FALSE)),
6262
## -- Join filters by OR operator.
63-
qc_filter = Reduce("|", qc_filters),
63+
qc_filter = Reduce(!!cfg$DATASET_SENSITIVE_FILTERS_OPERATOR, qc_filters),
6464

6565
## -- Custom filters.
6666
custom_filters_raw = list(
@@ -71,7 +71,7 @@ get_input_qc_subplan <- function(cfg, cfg_pipeline, cfg_main) {
7171
# low_ribo = cell_qc$subsets_ribo_percent <= !!cfg$MIN_RIBO_RATIO * 100
7272
),
7373
custom_filters = purrr::map(custom_filters_raw, ~ as.logical(.) %>% tidyr::replace_na(replace = FALSE)),
74-
custom_filter = Reduce("|", custom_filters),
74+
custom_filter = Reduce(!!cfg$CUSTOM_FILTERS_OPERATOR, custom_filters),
7575

7676
## -- Add filters to sce and create Seurat object.
7777
sce_unfiltered = sce_add_colData(

R/single_sample_input_qc.R

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,3 +263,19 @@ sce_final_input_qc_fn <- function(sce_selected, gene_annotation) {
263263

264264
return(sce_final_input_qc)
265265
}
266+
267+
#' Return an informative message about the used operator to join cell QC filters
268+
#'
269+
#' @param operator A character scalar: used operator.
270+
#'
271+
#' @return A character scalar: the message
272+
#'
273+
#' @concept single_sample_input_qc_fn
274+
#' @export
275+
get_used_qc_filters_operator_desc <- function(operator = c("&", "|")) {
276+
if (operator == "&") {
277+
"Individual filters were considered jointly (using *AND* operator), i.e., a cell was removed only if violated all of the filters."
278+
} else {
279+
"Individual filters were considered individually (using *OR* operator), i.e., a cell was removed if violated at least one filter."
280+
}
281+
}

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ You can pull the Docker image with the latest stable `{scdrake}` version
108108
using
109109

110110
``` bash
111-
docker pull jirinovo/scdrake:1.5.1
112-
singularity pull docker:jirinovo/scdrake:1.5.1
111+
docker pull jirinovo/scdrake:1.5.2
112+
singularity pull docker:jirinovo/scdrake:1.5.2
113113
```
114114

115115
or list available versions in [our Docker Hub
@@ -151,7 +151,7 @@ docker run -d \
151151
-e USERID=$(id -u) \
152152
-e GROUPID=$(id -g) \
153153
-e PASSWORD=1234 \
154-
jirinovo/scdrake:1.5.1
154+
jirinovo/scdrake:1.5.2
155155
```
156156

157157
For Singularity, also make shared directories and execute the container
@@ -234,7 +234,7 @@ for `{scdrake}` and you can use it to install all dependencies by
234234

235235
``` r
236236
## -- This is a lockfile for the latest stable version of scdrake.
237-
download.file("https://raw.githubusercontent.com/bioinfocz/scdrake/1.5.1/renv.lock")
237+
download.file("https://raw.githubusercontent.com/bioinfocz/scdrake/1.5.2/renv.lock")
238238
## -- You can increase the number of CPU cores to speed up the installation.
239239
options(Ncpus = 2)
240240
renv::restore(lockfile = "renv.lock", repos = BiocManager::repositories())
@@ -254,7 +254,7 @@ installed from the lockfile).
254254

255255
``` r
256256
remotes::install_github(
257-
"bioinfocz/scdrake@1.5.1",
257+
"bioinfocz/scdrake@1.5.2",
258258
dependencies = FALSE, upgrade = FALSE,
259259
keep_source = TRUE, build_vignettes = TRUE,
260260
repos = BiocManager::repositories()
@@ -321,7 +321,7 @@ vignette](https://bioinfocz.github.io/scdrake/articles/scdrake.html)
321321
## Vignettes and other readings
322322

323323
See <https://bioinfocz.github.io/scdrake> for a documentation website of
324-
the latest stable version (1.5.1) where links to vignettes below become
324+
the latest stable version (1.5.2) where links to vignettes below become
325325
real :-)
326326

327327
See <https://bioinfocz.github.io/scdrake/dev> for a documentation
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
Filter dataset based on custom (fixed) thresholds of QC metrics:
1+
Filter cells based on custom (fixed) thresholds of QC metrics:
22

33
- &lt;Min; Max&gt; UMI per cell: &lt;`r cfg$MIN_UMI_CF`; `r as.character(cfg$MAX_UMI_CF)`&gt;
44
- Min. number of features (genes) detected: `r cfg$MIN_FEATURES`
55
- Max. ratio of mitochondrial genes expression: `r cfg$MAX_MITO_RATIO`
66

7+
`r get_used_qc_filters_operator_desc(cfg$CUSTOM_FILTERS_OPERATOR)`
8+
79
**Removing `r sum(custom_filter)` low quality cells based on custom thresholds.**

inst/Rmd/single_sample/01_input_qc_children/cell_filtering_qc.Rmd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,6 @@ Filter cells based on QC metrics and MAD threshold (`r cfg$MAD_THRESHOLD`):
44
- Low number of detected genes (lower tail).
55
- High expression of mitochondrial genes (upper tail).
66

7+
`r get_used_qc_filters_operator_desc(cfg$CUSTOM_FILTERS_OPERATOR)`
8+
79
**Removing `r sum(qc_filter)` low quality cells based on MAD.**

inst/Rmd/single_sample/01_input_qc_children/gene_filtering_qc.Rmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ janitor::tabyl(sce_qc_filter_rowSums == 0) %>%
1313
**Removing `r sum(drake::readd(sce_qc_gene_filter, path = drake_cache_dir))` genes with UMI per cell less than
1414
`r cfg$MIN_UMI` and expressed in less than `r cfg$MIN_RATIO_CELLS * 100` % of all cells.**
1515

16-
Info on filtered dataset:
16+
Info on dataset-sensitive filtered dataset:
1717

1818
```{r}
1919
cat(drake::readd(sce_qc_filter_genes_info, path = drake_cache_dir)$str)

inst/config/single_sample/01_input_qc.default.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,15 @@ SAVE_DATASET_SENSITIVE_FILTERING: True
3030

3131
### Dataset-sensitive cell filtering ##########################################
3232
MAD_THRESHOLD: 3
33+
DATASET_SENSITIVE_FILTERS_OPERATOR: "&"
3334
###############################################################################
3435

3536
### Custom cell filtering #####################################################
3637
MIN_UMI_CF: 1000
3738
MAX_UMI_CF: 50000
3839
MIN_FEATURES: 1000
3940
MAX_MITO_RATIO: 0.2
41+
CUSTOM_FILTERS_OPERATOR: "&"
4042
###############################################################################
4143

4244
### Gene filtering ############################################################

0 commit comments

Comments
 (0)