Skip to content

Commit c23a94e

Browse files
committed
Initial commit to add seurat analysis for multi pipeline
1 parent 366327b commit c23a94e

File tree

4 files changed

+287
-14
lines changed

4 files changed

+287
-14
lines changed

workflow/Snakefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Python standard library
22
from os.path import join
33
from os import listdir
4-
import os, sys, re, json
4+
import os, sys, re, json, glob
55

66
# 3rd party imports from pypi
77
from snakemake.workflow import workflow as wf_api

workflow/rules/multi.smk

Lines changed: 271 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
wildcard_constraints:
2+
sample="|".join(lib_samples)
3+
14
# Pipeline output definition
25

36
# Cell Ranger multi output
@@ -16,6 +19,10 @@ pipeline_output += expand(
1619
)
1720

1821

22+
#pipeline_output += [join(workpath, "Project_Cell_Filters.csv")]
23+
pipeline_output += expand(join(workpath, "finalreport", "seurat", "Summary_QC_Report.html"), sample=lib_samples)
24+
pipeline_output += expand(join(workpath, "seurat", "{sample}", "SeuratQCReportCopy_{sample}.complete"), sample=lib_samples)
25+
1926
# Get set of input paths
2027
input_paths = [os.path.dirname(p) for p in inputs]
2128
input_paths_set = []
@@ -76,6 +83,119 @@ def conditional_flags(wildcards):
7683

7784
return(' '.join(flags))
7885

86+
def filterFile(wildcards):
87+
"""
88+
Wrapper to decide whether to provide a filter file for Seurat
89+
QC analysis.
90+
See config['options']['filter'] for the encoded value.
91+
"""
92+
if filter_file == "None":
93+
return("")
94+
else:
95+
return(f"--filterfile {filter_file}")
96+
97+
def filterFileBool(wildcards):
98+
"""
99+
Wrapper to get if a filter file was provided
100+
See config['options']['filter'] for the encoded value.
101+
"""
102+
if filter_file == "None":
103+
return("TRUE")
104+
else:
105+
return("FALSE")
106+
107+
def metadataFile(wildcards):
108+
"""
109+
Wrapper to decide whether to provide a metadata file for Seurat
110+
QC analysis.
111+
See config['options']['metadata'] for the encoded value.
112+
"""
113+
if METADATA_FILE == "None":
114+
return("")
115+
else:
116+
return(f"--metadata {METADATA_FILE}")
117+
118+
119+
def cellLibraryFilterSummary_input(wildcards):
120+
# note 1: ck_output is the same as OUTDIR
121+
122+
# note 2: checkpoints will have attributes for each of the checkpoint
123+
# rules, accessible by name.
124+
#
125+
# outs = directory(join(workpath, "{sample}", "outs", "per_sample_outs"))
126+
# directory("{sample}/clusters")
127+
# checkpoint_output = checkpoints.parse_clusters.get(**wildcards).output[0]
128+
# clusters = glob_wildcards(f"{checkpoint_output}/Cluster{{cluster}}_cells.txt").cluster
129+
#print(wildcards)
130+
checkpoint_output = checkpoints.multi.get(**wildcards).output['outs']
131+
SMP = glob_wildcards(os.path.join(checkpoint_output, "{count_sample}", "count", "sample_filtered_feature_bc_matrix")).count_sample
132+
return expand(join(workpath, "seurat", "{sample}", "{count_sample}", "seurat.complete"), **wildcards, count_sample=SMP, sample=lib_samples)
133+
134+
def seuratQCSummaryReport_input(wildcards):
135+
# note 1: ck_output is the same as OUTDIR
136+
137+
# note 2: checkpoints will have attributes for each of the checkpoint
138+
# rules, accessible by name.
139+
#
140+
# outs = directory(join(workpath, "{sample}", "outs", "per_sample_outs"))
141+
# directory("{sample}/clusters")
142+
# checkpoint_output = checkpoints.parse_clusters.get(**wildcards).output[0]
143+
# clusters = glob_wildcards(f"{checkpoint_output}/Cluster{{cluster}}_cells.txt").cluster
144+
checkpoint_output = checkpoints.multi.get(**wildcards).params['outs']
145+
#SMP = glob_wildcards(os.path.join(os.path.dirname(checkpoint_output), "{count_sample}", "seurat.complete")).count_sample
146+
SMP = glob_wildcards(os.path.join(checkpoint_output, "{count_sample}", "count", "sample_filtered_feature_bc_matrix")).count_sample
147+
return expand(join(workpath, "seurat", "{sample}", "{count_sample}", "seur_cluster.rds"), **wildcards, count_sample=SMP)
148+
149+
150+
def get_last_two_path_components(path_str):
151+
"""
152+
Extracts the last two components of a path using os.path.
153+
"""
154+
path_str = os.path.split(path_str)[0]
155+
components = []
156+
head, tail = os.path.split(path_str)
157+
while tail:
158+
components.insert(0, tail)
159+
head, tail = os.path.split(head)
160+
161+
if len(components) >= 2:
162+
return os.path.join(components[-2], components[-1])
163+
elif len(components) == 1:
164+
return components[0]
165+
else:
166+
return ""
167+
168+
def seuratQCSummarySamples(wildcards):
169+
"""
170+
Wrapper to return the sample list into an R friendly input format
171+
"""
172+
lib_samples_expanded = [get_last_two_path_components(i) for i in glob.glob(join(workpath, "seurat", "*", "*", "seur_cluster.rds"))]
173+
#return("c('{}')".format("','".join(lib_samples)))
174+
return("c('{}')".format("','".join(lib_samples_expanded)))
175+
176+
def seuratQC_output(wildcards):
177+
checkpoint_output = checkpoints.multi.get(**wildcards)
178+
print(checkpoint_output)
179+
180+
def seuratQCReportCopy_output(wildcards):
181+
# note 1: ck_output is the same as OUTDIR
182+
183+
# note 2: checkpoints will have attributes for each of the checkpoint
184+
# rules, accessible by name.
185+
#checkpoint_output = checkpoints.multi.get(**wildcards).output['outs']
186+
#SMP = glob_wildcards(os.path.join(checkpoint_output, "{count_sample}", "count", "sample_filtered_feature_bc_matrix")).count_sample
187+
#print(wildcards)
188+
#print(checkpoints)
189+
checkpoint_output = checkpoints.multi.get(**wildcards).output['outs']
190+
if int(CELLRANGER.split('.')[0]) >= 10:
191+
SMP = glob_wildcards(os.path.join(checkpoint_output, "{count_sample}", "sample_filtered_feature_bc_matrix")).count_sample
192+
else:
193+
SMP = glob_wildcards(os.path.join(checkpoint_output, "{count_sample}", "count", "sample_filtered_feature_bc_matrix")).count_sample
194+
#print(checkpoint_output)
195+
#print(SMP)
196+
#SMP = glob_wildcards(os.path.join(os.path.dirname(checkpoint_output), "{count_sample}", "seurat.complete")).count_sample
197+
return expand(join(workpath, "seurat", "{sample}", "{count_sample}", "copySeuratQCReport.complete"), **wildcards, count_sample=SMP)
198+
79199
# Rule definitions
80200
rule librariesCSV:
81201
output:
@@ -115,11 +235,12 @@ rule multiConfig:
115235
{params.flags}
116236
"""
117237

118-
rule multi:
238+
checkpoint multi:
119239
input:
120240
join(workpath, "{sample}.csv")
121241
output:
122-
join(workpath, "{sample}", "outs", "config.csv")
242+
config = join(workpath, "{sample}", "outs", "config.csv"),
243+
outs = directory(join(workpath, "{sample}", "outs", "per_sample_outs"))
123244
log:
124245
err = "run_{sample}_10x_cellranger_count.err",
125246
log ="run_{sample}_10x_cellranger_count.log"
@@ -178,3 +299,151 @@ rule sampleCleanup:
178299
rm -r {params.cr_temp}
179300
fi
180301
"""
302+
303+
rule seuratQC:
304+
input:
305+
data = join(workpath, "{sample}", "outs", "per_sample_outs", "{count_sample}", "sample_filtered_feature_bc_matrix") if int(CELLRANGER.split('.')[0]) >= 10 else join(workpath, "{sample}", "outs", "per_sample_outs", "{count_sample}", "count", "sample_filtered_feature_bc_matrix")
306+
output:
307+
complete = touch(join(workpath, "seurat", "{sample}", "{count_sample}", "seuratQC.complete"))
308+
# rds = join(workpath, "seurat", "{sample}", "{count_sample}", "seur_cluster.rds"),
309+
# cell_filter = join(workpath, "seurat", "{sample}", "{count_sample}", "cell_filter_info.csv"),
310+
# matrix = [
311+
# join(workpath, "seurat", "{sample}", "{count_sample}", "cite-seq-matrix", "export_HTO_matrix.csv"),
312+
# join(workpath, "seurat", "{sample}", "{count_sample}", "cite-seq-matrix", "export_ADT_matrix.csv")
313+
# ]
314+
params:
315+
rname = "seuratQC",
316+
sample = "{count_sample}",
317+
outdir = join(workpath, "seurat", "{sample}", "{count_sample}"),
318+
seurat = join("workflow", "scripts", "seuratCiteSampleQC.R"),
319+
filter = filterFile,
320+
metadata = metadataFile,
321+
log = join(workpath, "seurat", "{sample}", "{count_sample}", "seurat.log")
322+
envmodules: config["tools"]["rversion"]
323+
shell:
324+
"""
325+
unset __RLIBSUSER
326+
unset R_LIBS_USER
327+
328+
Rscript --vanilla {params.seurat} \\
329+
--workdir {params.outdir} \\
330+
--datapath {input.data} \\
331+
--sample {params.sample} \\
332+
--project export \\
333+
{params.filter} \\
334+
{params.metadata} \\
335+
> {params.log}
336+
"""
337+
338+
339+
rule seuratQCReport:
340+
input:
341+
complete = join(workpath, "seurat", "{sample}", "{count_sample}", "seuratQC.complete")
342+
#rds = rules.seuratQC.output.complete,
343+
#cell_filter = rules.seuratQC.output.complete
344+
output:
345+
complete = touch(join(workpath, "seurat", "{sample}", "{count_sample}", "seuratQCReport.complete"))
346+
params:
347+
rname = "seuratQCReport",
348+
sample = "{sample} - {count_sample}",
349+
seuratdir = join(workpath, "seurat", "{sample}", "{count_sample}"),
350+
rds = join(workpath, "seurat", "{sample}", "{count_sample}", "seur_cluster.rds"),
351+
filter = filterFileBool,
352+
tmpdir = tmpdir,
353+
report = join(workpath, "seurat", "{sample}", "{count_sample}_QC_Report.html"),
354+
script = join(workpath, "workflow", "scripts", "seuratCiteSampleQCReport.Rmd")
355+
envmodules: config["tools"]["rversion"]
356+
shell:
357+
"""
358+
unset __RLIBSUSER
359+
unset R_LIBS_USER
360+
361+
if [ -f "{params.rds}" ]; then
362+
cd {params.tmpdir}
363+
cp {params.script} "./{params.sample}.Rmd"
364+
R -e "rmarkdown::render('{params.sample}.Rmd', params=list(seuratdir='{params.seuratdir}', sample='{params.sample}', defaultfilter={params.filter}), output_file='{params.report}')"
365+
fi
366+
"""
367+
368+
rule copySeuratQCReport:
369+
input:
370+
complete = rules.seuratQCReport.output.complete
371+
output:
372+
complete = touch(join(workpath, "seurat", "{sample}", "{count_sample}", "copySeuratQCReport.complete"))
373+
params:
374+
report = rules.seuratQCReport.params.report,
375+
copied_report = join(workpath, "finalreport", "seurat", "{sample} - {count_sample}_QC_Report.html"),
376+
rname = "copySeuratQCReport"
377+
shell:
378+
"""
379+
DIRECTORY=$(dirname "{params.copied_report}")
380+
if [ -f "{params.report}" ]; then
381+
if [[ ! -d $DIRECTORY ]]; then
382+
mkdir -p $DIRECTORY
383+
fi
384+
cp {params.report} '{params.copied_report}'
385+
fi
386+
"""
387+
388+
rule finishAllSeuratQCReportCopy:
389+
input:
390+
report = seuratQCReportCopy_output
391+
output:
392+
complete = join(workpath, "seurat", "{sample}", "SeuratQCReportCopy_{sample}.complete")
393+
params:
394+
rname = "finishAllSeuratQCReportCopy"
395+
shell:
396+
"""
397+
touch {output.complete}
398+
"""
399+
400+
rule cellFilterSummary:
401+
input:
402+
expand(join(workpath, "seurat", "{sample}", "SeuratQCReportCopy_{sample}.complete"), sample=lib_samples)
403+
output:
404+
cell_filter_summary = join(workpath, "Project_Cell_Filters.csv")
405+
params:
406+
rname = "cellFilterSummary",
407+
seuratdir = join(workpath, "seurat", "*"),
408+
filename = "cell_filter_info.csv",
409+
script = join("workflow", "scripts", "cellFilterSummary.R")
410+
envmodules: config["tools"]["rversion"]
411+
shell:
412+
"""
413+
unset __RLIBSUSER
414+
unset R_LIBS_USER
415+
416+
Rscript {params.script} --datapath "{params.seuratdir}" --filename {params.filename} --output {output.cell_filter_summary}
417+
"""
418+
419+
rule seuratQCSummaryReport:
420+
input:
421+
complete = expand(join(workpath, "seurat", "{sample}", "SeuratQCReportCopy_{sample}.complete"), sample=lib_samples),
422+
cell_filter = rules.cellFilterSummary.output.cell_filter_summary
423+
output:
424+
report = join(workpath, "seurat", "Summary_QC_Report.html")
425+
params:
426+
rname = "seuratQCSummaryReport",
427+
samples = seuratQCSummarySamples,
428+
seuratdir = join(workpath, "seurat"),
429+
script = join(workpath, "workflow", "scripts", "seuratCiteSampleQCSummaryReport.Rmd")
430+
envmodules: config["tools"]["rversion"]
431+
shell:
432+
"""
433+
unset __RLIBSUSER
434+
unset R_LIBS_USER
435+
436+
R -e "rmarkdown::render('{params.script}', params=list(seuratdir='{params.seuratdir}', samples={params.samples}, cellfilter='{input.cell_filter}'), output_file='{output.report}')"
437+
"""
438+
439+
rule copySeuratQCSummaryReport:
440+
input:
441+
report = rules.seuratQCSummaryReport.output.report
442+
output:
443+
report = join(workpath, "finalreport", "seurat", "Summary_QC_Report.html")
444+
params:
445+
rname = "copySeuratQCSummaryReport"
446+
shell:
447+
"""
448+
cp {input.report} {output.report}
449+
"""

workflow/scripts/cellFilterSummary.R

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,18 @@ option_list <- list(
1212
opt <- parse_args(OptionParser(option_list=option_list))
1313

1414
filenames <- Sys.glob(file.path(opt$datapath, '*', opt$filename))
15+
foldernames <- filenames |> dirname() |> dirname() |> basename() |> unique()
1516
filters <- read.csv(filenames[1])
1617
filters$Sample <- basename(dirname(filenames[1]))
1718

1819
if(length(filenames) > 1) {
1920
for (filename in filenames[2:length(filenames)]) {
2021
data <- read.csv(filename)
21-
data$Sample <- basename(dirname(filename))
22+
if (length(foldernames) == 1) {
23+
data$Sample <- basename(dirname(filename))
24+
} else {
25+
data$Sample <- sprintf('%s - %s', filename |> dirname() |> dirname() |> basename(), basename(dirname(filename)))
26+
}
2227
filters <- full_join(filters, data)
2328
}
2429
}

0 commit comments

Comments
 (0)