-
Notifications
You must be signed in to change notification settings - Fork 115
Expand file tree
/
Copy pathSlideSeq.wdl
More file actions
218 lines (190 loc) · 8.67 KB
/
SlideSeq.wdl
File metadata and controls
218 lines (190 loc) · 8.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
version 1.0
import "../../../tasks/wdl/StarAlign.wdl" as StarAlign
import "../../../tasks/wdl/FastqProcessing.wdl" as FastqProcessing
import "../../../tasks/wdl/Metrics.wdl" as Metrics
import "../../../tasks/wdl/H5adUtils.wdl" as H5adUtils
import "../../../tasks/wdl/CheckInputs.wdl" as OptimusInputChecks
import "../../../tasks/wdl/MergeSortBam.wdl" as Merge
import "../../../tasks/wdl/Utilities.wdl" as utils
## Copyright Broad Institute, 2022
##
## This WDL pipeline implements data processing for RNA with UMIs
##
## Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
## For program versions, see docker containers.
##
## LICENSING :
## This script is released under the WDL source code license (BSD-3) (see LICENSE in
## https://github.com/broadinstitute/wdl). Note however that the programs it calls may
## be subject to different licenses. Users are responsible for checking that they are
## authorized to run all programs before running this script.
workflow SlideSeq {
String pipeline_version = "3.6.5"
input {
Array[File] r1_fastq
Array[File] r2_fastq
Array[File]? i1_fastq
String input_id
String read_structure
File tar_star_reference
File annotations_gtf
String output_bam_basename
Boolean count_exons = true
File bead_locations
String cloud_provider
}
# docker images
String pytools_docker = "pytools:1.0.0-1661263730"
String picard_cloud_docker = "picard-cloud:2.26.10"
String warp_tools_docker = "warp-tools:2.6.1"
String star_merge_docker = "star-merge-npz:1.3.0"
String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf"
String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/"
String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/"
String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix
String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
# choose docker prefix based on cloud provider
String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
# make sure either gcp or azr is supplied as cloud_provider input
if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
input:
message = "cloud_provider must be supplied with either 'gcp' or 'azure'."
}
}
parameter_meta {
r1_fastq: "Array of Read 1 FASTQ files; forward read; contains cell barcodes and molecule barcodes"
r2_fastq: "Array of Read 2 FASTQ files; reverse read; contains cDNA fragment generated from captured mRNA"
i1_fastq: "Optional array of i1 FASTQ files; index read used for demultiplexing of multiple samples on one flow cell"
input_id: "Name of sample matching this file; inserted into read group header"
read_structure: "String used to specify the UMI (M) and Barcode (C) positions in the Read 1 FASTQ"
}
call StarAlign.STARGenomeRefVersion as ReferenceCheck {
input:
tar_star_reference = tar_star_reference,
ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker
}
call Metrics.FastqMetricsSlideSeq as FastqMetrics {
input:
r1_fastq = r1_fastq,
read_structure = read_structure,
sample_id = input_id,
whitelist = bead_locations
}
call FastqProcessing.FastqProcessingSlidSeq as SplitFastq {
input:
r1_fastq = r1_fastq,
r2_fastq = r2_fastq,
i1_fastq = i1_fastq,
read_structure = read_structure,
sample_id = input_id,
whitelist = bead_locations
}
scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) {
call StarAlign.STARsoloFastqSlideSeq as STARsoloFastqSlideSeq {
input:
r1_fastq = [SplitFastq.fastq_R1_output_array[idx]],
r2_fastq = [SplitFastq.fastq_R2_output_array[idx]],
whitelist = bead_locations,
tar_star_reference = tar_star_reference,
output_bam_basename = output_bam_basename + "_" + idx,
read_structure = read_structure,
count_exons = count_exons
}
}
call Merge.MergeSortBamFiles as MergeBam {
input:
bam_inputs = STARsoloFastqSlideSeq.bam_output,
output_bam_filename = output_bam_basename + ".bam",
sort_order = "coordinate",
picard_cloud_docker_path = docker_prefix + picard_cloud_docker
}
call Metrics.CalculateGeneMetrics as GeneMetrics {
input:
bam_input = MergeBam.output_bam,
original_gtf = annotations_gtf,
input_id = input_id,
warp_tools_docker_path = docker_prefix + warp_tools_docker
}
call Metrics.CalculateUMIsMetrics as UMIsMetrics {
input:
bam_input = MergeBam.output_bam,
original_gtf = annotations_gtf,
input_id = input_id
}
call Metrics.CalculateCellMetrics as CellMetrics {
input:
bam_input = MergeBam.output_bam,
original_gtf = annotations_gtf,
input_id = input_id,
warp_tools_docker_path = docker_prefix + warp_tools_docker
}
call StarAlign.MergeStarOutput as MergeStarOutputs {
input:
barcodes = STARsoloFastqSlideSeq.barcodes,
features = STARsoloFastqSlideSeq.features,
matrix = STARsoloFastqSlideSeq.matrix,
input_id = input_id,
star_merge_docker_path = docker_prefix + star_merge_docker
}
if ( !count_exons ) {
call H5adUtils.SlideseqH5adGeneration as SlideseqH5adGeneration{
input:
input_id = input_id,
annotation_file = annotations_gtf,
cell_metrics = CellMetrics.cell_metrics,
gene_metrics = GeneMetrics.gene_metrics,
sparse_count_matrix = MergeStarOutputs.sparse_counts,
cell_id = MergeStarOutputs.row_index,
gene_id = MergeStarOutputs.col_index,
add_emptydrops_data = "no",
pipeline_version = "SlideSeq_v~{pipeline_version}",
warp_tools_docker_path = docker_prefix + warp_tools_docker
}
}
if (count_exons) {
call StarAlign.MergeStarOutput as MergeStarOutputsExons {
input:
barcodes = STARsoloFastqSlideSeq.barcodes_sn_rna,
features = STARsoloFastqSlideSeq.features_sn_rna,
matrix = STARsoloFastqSlideSeq.matrix_sn_rna,
input_id = input_id,
star_merge_docker_path = docker_prefix + star_merge_docker
}
call H5adUtils.SingleNucleusSlideseqH5adOutput as SlideseqH5adGenerationWithExons{
input:
input_id = input_id,
annotation_file = annotations_gtf,
cell_metrics = CellMetrics.cell_metrics,
gene_metrics = GeneMetrics.gene_metrics,
sparse_count_matrix = MergeStarOutputs.sparse_counts,
cell_id = MergeStarOutputs.row_index,
gene_id = MergeStarOutputs.col_index,
sparse_count_matrix_exon = MergeStarOutputsExons.sparse_counts,
cell_id_exon = MergeStarOutputsExons.row_index,
gene_id_exon = MergeStarOutputsExons.col_index,
pipeline_version = "SlideSeq_v~{pipeline_version}",
warp_tools_docker_path = docker_prefix + warp_tools_docker
}
}
File final_h5ad_output = select_first([SlideseqH5adGenerationWithExons.h5ad_output, SlideseqH5adGeneration.h5ad_output])
output {
String pipeline_version_out = pipeline_version
File genomic_reference_version = ReferenceCheck.genomic_ref_version
File bam = MergeBam.output_bam
# sparse count matrix
File matrix = MergeStarOutputs.sparse_counts
File matrix_row_index = MergeStarOutputs.row_index
File matrix_col_index = MergeStarOutputs.col_index
File cell_metrics = CellMetrics.cell_metrics
File gene_metrics = GeneMetrics.gene_metrics
File umi_metrics = UMIsMetrics.umi_metrics
File fastq_barcode_distribution = FastqMetrics.barcode_distribution
File fastq_umi_distribution = FastqMetrics.umi_distribution
File fastq_reads_per_cell = FastqMetrics.numReads_perCell
File fastq_reads_per_umi = FastqMetrics.numReads_perUMI
# h5ad
File? h5ad_output_file = final_h5ad_output
}
}