1+ wildcard_constraints :
2+ sample = "|" .join (lib_samples )
3+
14# Pipeline output definition
25
36# Cell Ranger multi output
@@ -16,6 +19,10 @@ pipeline_output += expand(
1619)
1720
1821
22+ #pipeline_output += [join(workpath, "Project_Cell_Filters.csv")]
23+ pipeline_output += expand (join (workpath , "finalreport" , "seurat" , "Summary_QC_Report.html" ), sample = lib_samples )
24+ pipeline_output += expand (join (workpath , "seurat" , "{sample}" , "SeuratQCReportCopy_{sample}.complete" ), sample = lib_samples )
25+
1926# Get set of input paths
2027input_paths = [os .path .dirname (p ) for p in inputs ]
2128input_paths_set = []
@@ -76,6 +83,119 @@ def conditional_flags(wildcards):
7683
7784 return (' ' .join (flags ))
7885
86+ def filterFile (wildcards ):
87+ """
88+ Wrapper to decide whether to provide a filter file for Seurat
89+ QC analysis.
90+ See config['options']['filter'] for the encoded value.
91+ """
92+ if filter_file == "None" :
93+ return ("" )
94+ else :
95+ return (f"--filterfile { filter_file } " )
96+
97+ def filterFileBool (wildcards ):
98+ """
99+ Wrapper to get if a filter file was provided
100+ See config['options']['filter'] for the encoded value.
101+ """
102+ if filter_file == "None" :
103+ return ("TRUE" )
104+ else :
105+ return ("FALSE" )
106+
107+ def metadataFile (wildcards ):
108+ """
109+ Wrapper to decide whether to provide a metadata file for Seurat
110+ QC analysis.
111+ See config['options']['metadata'] for the encoded value.
112+ """
113+ if METADATA_FILE == "None" :
114+ return ("" )
115+ else :
116+ return (f"--metadata { METADATA_FILE } " )
117+
118+
119+ def cellLibraryFilterSummary_input (wildcards ):
120+ # note 1: ck_output is the same as OUTDIR
121+
122+ # note 2: checkpoints will have attributes for each of the checkpoint
123+ # rules, accessible by name.
124+ #
125+ # outs = directory(join(workpath, "{sample}", "outs", "per_sample_outs"))
126+ # directory("{sample}/clusters")
127+ # checkpoint_output = checkpoints.parse_clusters.get(**wildcards).output[0]
128+ # clusters = glob_wildcards(f"{checkpoint_output}/Cluster{{cluster}}_cells.txt").cluster
129+ #print(wildcards)
130+ checkpoint_output = checkpoints .multi .get (** wildcards ).output ['outs' ]
131+ SMP = glob_wildcards (os .path .join (checkpoint_output , "{count_sample}" , "count" , "sample_filtered_feature_bc_matrix" )).count_sample
132+ return expand (join (workpath , "seurat" , "{sample}" , "{count_sample}" , "seurat.complete" ), ** wildcards , count_sample = SMP , sample = lib_samples )
133+
134+ def seuratQCSummaryReport_input (wildcards ):
135+ # note 1: ck_output is the same as OUTDIR
136+
137+ # note 2: checkpoints will have attributes for each of the checkpoint
138+ # rules, accessible by name.
139+ #
140+ # outs = directory(join(workpath, "{sample}", "outs", "per_sample_outs"))
141+ # directory("{sample}/clusters")
142+ # checkpoint_output = checkpoints.parse_clusters.get(**wildcards).output[0]
143+ # clusters = glob_wildcards(f"{checkpoint_output}/Cluster{{cluster}}_cells.txt").cluster
144+ checkpoint_output = checkpoints .multi .get (** wildcards ).params ['outs' ]
145+ #SMP = glob_wildcards(os.path.join(os.path.dirname(checkpoint_output), "{count_sample}", "seurat.complete")).count_sample
146+ SMP = glob_wildcards (os .path .join (checkpoint_output , "{count_sample}" , "count" , "sample_filtered_feature_bc_matrix" )).count_sample
147+ return expand (join (workpath , "seurat" , "{sample}" , "{count_sample}" , "seur_cluster.rds" ), ** wildcards , count_sample = SMP )
148+
149+
150+ def get_last_two_path_components (path_str ):
151+ """
152+ Extracts the last two components of a path using os.path.
153+ """
154+ path_str = os .path .split (path_str )[0 ]
155+ components = []
156+ head , tail = os .path .split (path_str )
157+ while tail :
158+ components .insert (0 , tail )
159+ head , tail = os .path .split (head )
160+
161+ if len (components ) >= 2 :
162+ return os .path .join (components [- 2 ], components [- 1 ])
163+ elif len (components ) == 1 :
164+ return components [0 ]
165+ else :
166+ return ""
167+
168+ def seuratQCSummarySamples (wildcards ):
169+ """
170+ Wrapper to return the sample list into an R friendly input format
171+ """
172+ lib_samples_expanded = [get_last_two_path_components (i ) for i in glob .glob (join (workpath , "seurat" , "*" , "*" , "seur_cluster.rds" ))]
173+ #return("c('{}')".format("','".join(lib_samples)))
174+ return ("c('{}')" .format ("','" .join (lib_samples_expanded )))
175+
176+ def seuratQC_output (wildcards ):
177+ checkpoint_output = checkpoints .multi .get (** wildcards )
178+ print (checkpoint_output )
179+
180+ def seuratQCReportCopy_output (wildcards ):
181+ # note 1: ck_output is the same as OUTDIR
182+
183+ # note 2: checkpoints will have attributes for each of the checkpoint
184+ # rules, accessible by name.
185+ #checkpoint_output = checkpoints.multi.get(**wildcards).output['outs']
186+ #SMP = glob_wildcards(os.path.join(checkpoint_output, "{count_sample}", "count", "sample_filtered_feature_bc_matrix")).count_sample
187+ #print(wildcards)
188+ #print(checkpoints)
189+ checkpoint_output = checkpoints .multi .get (** wildcards ).output ['outs' ]
190+ if int (CELLRANGER .split ('.' )[0 ]) >= 10 :
191+ SMP = glob_wildcards (os .path .join (checkpoint_output , "{count_sample}" , "sample_filtered_feature_bc_matrix" )).count_sample
192+ else :
193+ SMP = glob_wildcards (os .path .join (checkpoint_output , "{count_sample}" , "count" , "sample_filtered_feature_bc_matrix" )).count_sample
194+ #print(checkpoint_output)
195+ #print(SMP)
196+ #SMP = glob_wildcards(os.path.join(os.path.dirname(checkpoint_output), "{count_sample}", "seurat.complete")).count_sample
197+ return expand (join (workpath , "seurat" , "{sample}" , "{count_sample}" , "copySeuratQCReport.complete" ), ** wildcards , count_sample = SMP )
198+
79199# Rule definitions
80200rule librariesCSV :
81201 output :
@@ -115,11 +235,12 @@ rule multiConfig:
115235 {params.flags}
116236 """
117237
118- rule multi :
238+ checkpoint multi :
119239 input :
120240 join (workpath , "{sample}.csv" )
121241 output :
122- join (workpath , "{sample}" , "outs" , "config.csv" )
242+ config = join (workpath , "{sample}" , "outs" , "config.csv" ),
243+ outs = directory (join (workpath , "{sample}" , "outs" , "per_sample_outs" ))
123244 log :
124245 err = "run_{sample}_10x_cellranger_count.err" ,
125246 log = "run_{sample}_10x_cellranger_count.log"
@@ -178,3 +299,151 @@ rule sampleCleanup:
178299 rm -r {params.cr_temp}
179300 fi
180301 """
302+
303+ rule seuratQC :
304+ input :
305+ data = join (workpath , "{sample}" , "outs" , "per_sample_outs" , "{count_sample}" , "sample_filtered_feature_bc_matrix" ) if int (CELLRANGER .split ('.' )[0 ]) >= 10 else join (workpath , "{sample}" , "outs" , "per_sample_outs" , "{count_sample}" , "count" , "sample_filtered_feature_bc_matrix" )
306+ output :
307+ complete = touch (join (workpath , "seurat" , "{sample}" , "{count_sample}" , "seuratQC.complete" ))
308+ # rds = join(workpath, "seurat", "{sample}", "{count_sample}", "seur_cluster.rds"),
309+ # cell_filter = join(workpath, "seurat", "{sample}", "{count_sample}", "cell_filter_info.csv"),
310+ # matrix = [
311+ # join(workpath, "seurat", "{sample}", "{count_sample}", "cite-seq-matrix", "export_HTO_matrix.csv"),
312+ # join(workpath, "seurat", "{sample}", "{count_sample}", "cite-seq-matrix", "export_ADT_matrix.csv")
313+ # ]
314+ params :
315+ rname = "seuratQC" ,
316+ sample = "{count_sample}" ,
317+ outdir = join (workpath , "seurat" , "{sample}" , "{count_sample}" ),
318+ seurat = join ("workflow" , "scripts" , "seuratCiteSampleQC.R" ),
319+ filter = filterFile ,
320+ metadata = metadataFile ,
321+ log = join (workpath , "seurat" , "{sample}" , "{count_sample}" , "seurat.log" )
322+ envmodules : config ["tools" ]["rversion" ]
323+ shell :
324+ """
325+ unset __RLIBSUSER
326+ unset R_LIBS_USER
327+
328+ Rscript --vanilla {params.seurat} \\
329+ --workdir {params.outdir} \\
330+ --datapath {input.data} \\
331+ --sample {params.sample} \\
332+ --project export \\
333+ {params.filter} \\
334+ {params.metadata} \\
335+ > {params.log}
336+ """
337+
338+
339+ rule seuratQCReport :
340+ input :
341+ complete = join (workpath , "seurat" , "{sample}" , "{count_sample}" , "seuratQC.complete" )
342+ #rds = rules.seuratQC.output.complete,
343+ #cell_filter = rules.seuratQC.output.complete
344+ output :
345+ complete = touch (join (workpath , "seurat" , "{sample}" , "{count_sample}" , "seuratQCReport.complete" ))
346+ params :
347+ rname = "seuratQCReport" ,
348+ sample = "{sample} - {count_sample}" ,
349+ seuratdir = join (workpath , "seurat" , "{sample}" , "{count_sample}" ),
350+ rds = join (workpath , "seurat" , "{sample}" , "{count_sample}" , "seur_cluster.rds" ),
351+ filter = filterFileBool ,
352+ tmpdir = tmpdir ,
353+ report = join (workpath , "seurat" , "{sample}" , "{count_sample}_QC_Report.html" ),
354+ script = join (workpath , "workflow" , "scripts" , "seuratCiteSampleQCReport.Rmd" )
355+ envmodules : config ["tools" ]["rversion" ]
356+ shell :
357+ """
358+ unset __RLIBSUSER
359+ unset R_LIBS_USER
360+
361+ if [ -f "{params.rds}" ]; then
362+ cd {params.tmpdir}
363+ cp {params.script} "./{params.sample}.Rmd"
364+ R -e "rmarkdown::render('{params.sample}.Rmd', params=list(seuratdir='{params.seuratdir}', sample='{params.sample}', defaultfilter={params.filter}), output_file='{params.report}')"
365+ fi
366+ """
367+
368+ rule copySeuratQCReport :
369+ input :
370+ complete = rules .seuratQCReport .output .complete
371+ output :
372+ complete = touch (join (workpath , "seurat" , "{sample}" , "{count_sample}" , "copySeuratQCReport.complete" ))
373+ params :
374+ report = rules .seuratQCReport .params .report ,
375+ copied_report = join (workpath , "finalreport" , "seurat" , "{sample} - {count_sample}_QC_Report.html" ),
376+ rname = "copySeuratQCReport"
377+ shell :
378+ """
379+ DIRECTORY=$(dirname "{params.copied_report}")
380+ if [ -f "{params.report}" ]; then
381+ if [[ ! -d $DIRECTORY ]]; then
382+ mkdir -p $DIRECTORY
383+ fi
384+ cp {params.report} '{params.copied_report}'
385+ fi
386+ """
387+
388+ rule finishAllSeuratQCReportCopy :
389+ input :
390+ report = seuratQCReportCopy_output
391+ output :
392+ complete = join (workpath , "seurat" , "{sample}" , "SeuratQCReportCopy_{sample}.complete" )
393+ params :
394+ rname = "finishAllSeuratQCReportCopy"
395+ shell :
396+ """
397+ touch {output.complete}
398+ """
399+
400+ rule cellFilterSummary :
401+ input :
402+ expand (join (workpath , "seurat" , "{sample}" , "SeuratQCReportCopy_{sample}.complete" ), sample = lib_samples )
403+ output :
404+ cell_filter_summary = join (workpath , "Project_Cell_Filters.csv" )
405+ params :
406+ rname = "cellFilterSummary" ,
407+ seuratdir = join (workpath , "seurat" , "*" ),
408+ filename = "cell_filter_info.csv" ,
409+ script = join ("workflow" , "scripts" , "cellFilterSummary.R" )
410+ envmodules : config ["tools" ]["rversion" ]
411+ shell :
412+ """
413+ unset __RLIBSUSER
414+ unset R_LIBS_USER
415+
416+ Rscript {params.script} --datapath "{params.seuratdir}" --filename {params.filename} --output {output.cell_filter_summary}
417+ """
418+
419+ rule seuratQCSummaryReport :
420+ input :
421+ complete = expand (join (workpath , "seurat" , "{sample}" , "SeuratQCReportCopy_{sample}.complete" ), sample = lib_samples ),
422+ cell_filter = rules .cellFilterSummary .output .cell_filter_summary
423+ output :
424+ report = join (workpath , "seurat" , "Summary_QC_Report.html" )
425+ params :
426+ rname = "seuratQCSummaryReport" ,
427+ samples = seuratQCSummarySamples ,
428+ seuratdir = join (workpath , "seurat" ),
429+ script = join (workpath , "workflow" , "scripts" , "seuratCiteSampleQCSummaryReport.Rmd" )
430+ envmodules : config ["tools" ]["rversion" ]
431+ shell :
432+ """
433+ unset __RLIBSUSER
434+ unset R_LIBS_USER
435+
436+ R -e "rmarkdown::render('{params.script}', params=list(seuratdir='{params.seuratdir}', samples={params.samples}, cellfilter='{input.cell_filter}'), output_file='{output.report}')"
437+ """
438+
439+ rule copySeuratQCSummaryReport :
440+ input :
441+ report = rules .seuratQCSummaryReport .output .report
442+ output :
443+ report = join (workpath , "finalreport" , "seurat" , "Summary_QC_Report.html" )
444+ params :
445+ rname = "copySeuratQCSummaryReport"
446+ shell :
447+ """
448+ cp {input.report} {output.report}
449+ """
0 commit comments