Skip to content

Commit d0f5035

Browse files
committed
--test
1 parent 81cfd87 commit d0f5035

File tree

2 files changed

+244
-3
lines changed

2 files changed

+244
-3
lines changed

inputs.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@
104104
"SVShell.intron_reference": "gs://broad-dsde-methods-vj/sv-shell-inputs/gencode.v39.CDS.intron.tsv.gz",
105105
"SVShell.par_bed": "gs://broad-dsde-methods-vj/sv-shell-inputs/hg38.par.bed",
106106
"SVShell.sample_bincov_matrix": "TODO_MAYBE_NOT_NEEEDED",
107-
"SVShell.sample_bincov_matrix_idx": "gs://broad-dsde-methods-vj/sv-shell-inputs/NA12878.RD.txt.gz.tbi",
107+
"SVShell.sample_bincov_matrix_idx": "TODO: delete this one too",
108108
"SVShell.PE_metrics": "gs://broad-dsde-methods-vj/sv-shell-inputs/all_samples.pe_metric_file.txt",
109109
"SVShell.SR_metrics": "gs://broad-dsde-methods-vj/sv-shell-inputs/all_samples.sr_metric_file.txt",
110110
"SVShell.rmsk": "gs://broad-dsde-methods-vj/sv-shell-inputs/hg38.randomForest_blacklist.withRepMask.bed.gz",

wdl/SVShell.wdl

Lines changed: 243 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,30 @@ workflow SVShell {
66
input {
77
File gcnv_model_tars_list
88
File ref_pesr_split_files_list
9+
File ref_pesr_disc_files_list
10+
File ref_pesr_sd_files_list
11+
File pesr_exclude_intervals
12+
File ref_panel_vcf
13+
File ref_panel_bincov_matrix
14+
File bin_exclude
15+
File cytobands
16+
File depth_exclude_list
17+
Array[File] genome_tracks
18+
File mei_bed
19+
File manta_region_bed
20+
File HERVK_reference
21+
File LINE1_reference
22+
File intron_reference
23+
File? outlier_samples_list
24+
File? par_bed
25+
File PE_metrics
26+
File SR_metrics
27+
File rmsk
28+
File segdups
29+
File genotype_pesr_depth_sepcutoff
30+
File genotype_pesr_pesr_sepcutoff
31+
File genotype_depth_depth_sepcutoff
32+
File genotype_depth_pesr_sepcutoff
933
}
1034

1135
Array[File] gcnv_model_tars = read_lines(gcnv_model_tars_list)
@@ -16,31 +40,232 @@ workflow SVShell {
1640
}
1741
Array[File] ref_pesr_split_file_indices = ref_pesr_split_file_index
1842

43+
Array[File] ref_pesr_disc_files = read_lines(ref_pesr_disc_files_list)
44+
scatter (ref_pesr_disc_file in ref_pesr_disc_files) {
45+
File ref_pesr_disc_file_index = ref_pesr_disc_file + ".tbi"
46+
}
47+
Array[File] ref_pesr_disc_file_indices = ref_pesr_disc_file_index
48+
49+
Array[File] ref_pesr_sd_files = read_lines(ref_pesr_sd_files_list)
50+
scatter (ref_pesr_sd_file in ref_pesr_sd_files) {
51+
File ref_pesr_sd_file_index = ref_pesr_sd_file + ".tbi"
52+
}
53+
Array[File] ref_pesr_sd_file_indices = ref_pesr_sd_file_index
54+
55+
scatter (genome_track in genome_tracks) {
56+
File genome_track_index = genome_track + ".tbi"
57+
}
58+
Array[File] genome_tracks_indices = genome_track_index
59+
60+
File pesr_exclude_intervals_index = pesr_exclude_intervals + ".tbi"
61+
File ref_panel_vcf_index = ref_panel_vcf + ".tbi"
62+
File ref_panel_bincov_matrix_index = ref_panel_bincov_matrix + ".tbi"
63+
File bin_exclude_index = bin_exclude + ".tbi"
64+
File cytobands_index = cytobands + ".tbi"
65+
File depth_exclude_list_index = depth_exclude_list + ".tbi"
66+
File mei_bed_index = mei_bed + ".tbi"
67+
File manta_region_bed_index = manta_region_bed + ".tbi"
1968

2069
call RunSVShell {
2170
input:
2271
gcnv_model_tars = gcnv_model_tars,
2372
ref_pesr_split_files = ref_pesr_split_files,
24-
ref_pesr_split_files_indices = ref_pesr_split_file_indices
73+
ref_pesr_disc_files = ref_pesr_disc_files,
74+
ref_pesr_sd_files = ref_pesr_sd_files,
75+
ref_pesr_split_files_indices = ref_pesr_split_file_indices,
76+
ref_pesr_disc_files_indices = ref_pesr_disc_file_indices,
77+
ref_pesr_sd_files_indices = ref_pesr_sd_file_indices,
78+
genome_tracks = genome_tracks,
79+
genome_tracks_indices = genome_tracks_indices,
80+
pesr_exclude_intervals = pesr_exclude_intervals,
81+
pesr_exclude_intervals_index = pesr_exclude_intervals_index,
82+
ref_panel_vcf = ref_panel_vcf,
83+
ref_panel_vcf_index = ref_panel_vcf_index,
84+
ref_panel_bincov_matrix = ref_panel_bincov_matrix,
85+
ref_panel_bincov_matrix_index = ref_panel_bincov_matrix_index,
86+
bin_exclude = bin_exclude,
87+
bin_exclude_index = bin_exclude_index,
88+
cytobands = cytobands,
89+
cytobands_index = cytobands_index,
90+
depth_exclude_list = depth_exclude_list,
91+
depth_exclude_list_index = depth_exclude_list_index,
92+
mei_bed = mei_bed,
93+
mei_bed_index = mei_bed_index,
94+
manta_region_bed = manta_region_bed,
95+
manta_region_bed_index = manta_region_bed_index,
96+
HERVK_reference = HERVK_reference,
97+
LINE1_reference = LINE1_reference,
98+
intron_reference = intron_reference,
99+
outlier_samples_list = outlier_samples_list,
100+
par_bed = par_bed,
101+
PE_metrics = PE_metrics,
102+
SR_metrics = SR_metrics,
103+
rmsk = rmsk,
104+
segdups = segdups,
105+
genotype_pesr_depth_sepcutoff = genotype_pesr_depth_sepcutoff,
106+
genotype_pesr_pesr_sepcutoff = genotype_pesr_pesr_sepcutoff,
107+
genotype_depth_depth_sepcutoff = genotype_depth_depth_sepcutoff,
108+
genotype_depth_pesr_sepcutoff = genotype_depth_pesr_sepcutoff,
25109
}
26110
27111
28112
output {
29113
File inputs_json = RunSVShell.inputs_json
114+
# File outputs_json = RunSVShell.outputs_json
115+
# File final_vcf = RunSVShell.final_vcf
116+
# File final_vcf_idx = RunSVShell.final_vcf_idx
117+
# File pre_cleanup_vcf = RunSVShell.pre_cleanup_vcf
118+
# File pre_cleanup_vcf_idx = RunSVShell.pre_cleanup_vcf_idx
119+
# File stripy_json_output = RunSVShell.stripy_json_output
120+
# File stripy_tsv_output = RunSVShell.stripy_tsv_output
121+
# File stripy_html_output = RunSVShell.stripy_html_output
122+
# File stripy_vcf_output = RunSVShell.stripy_vcf_output
123+
# File metrics_file = RunSVShell.metrics_file
124+
# File qc_file = RunSVShell.qc_file
125+
# File ploidy_matrix = RunSVShell.ploidy_matrix
126+
# File ploidy_plots = RunSVShell.ploidy_plots
127+
# File non_genotyped_unique_depth_calls = RunSVShell.non_genotyped_unique_depth_calls
30128
}
31129
}
32130

33131
task RunSVShell {
34132
input {
35133
String batch
134+
String sample_id
135+
File ref_samples_list
136+
File ref_ped_file
137+
File genome_file
138+
File primary_contigs_list
139+
File primary_contigs_fai
140+
File reference_fasta
141+
File reference_index
142+
File reference_dict
143+
File ref_panel_vcf
144+
File autosome_file
145+
File allosome_file
146+
File bam_or_cram_file
147+
File bam_or_cram_index
148+
File preprocessed_intervals
149+
File manta_region_bed
150+
File manta_region_bed_index
151+
File sd_locs_vcf
152+
File wham_include_list_bed_file
153+
File reference_bwa_alt
154+
File reference_bwa_amb
155+
File reference_bwa_ann
156+
File reference_bwa_bwt
157+
File reference_bwa_pac
158+
File reference_bwa_sa
159+
Boolean run_vcf_qc
160+
File wgd_scoring_mask
161+
Int min_svsize
162+
File contig_ploidy_model_tar
36163
Array[File] gcnv_model_tars
164+
File ref_panel_bincov_matrix
165+
Array[File] ref_pesr_disc_files
37166
Array[File] ref_pesr_split_files
167+
Array[File] ref_pesr_sd_files
38168
Array[File] ref_pesr_split_files_indices
39-
169+
Array[File] ref_pesr_disc_files_indices
170+
Array[File] ref_pesr_sd_files_indices
171+
Array[File] genome_tracks_indices
172+
File pesr_exclude_intervals_index
173+
File ref_panel_vcf_index
174+
File ref_panel_bincov_matrix_index
175+
File bin_exclude_index
176+
File cytobands_index
177+
File depth_exclude_list_index
178+
File mei_bed_index
179+
Int ref_copy_number_autosomal_contigs
180+
Int gcnv_qs_cutoff
181+
File cnmops_exclude_list
182+
Int matrix_qc_distance
183+
File? ref_std_manta_vcf_tar
184+
File? ref_std_scramble_vcf_tar
185+
File? ref_std_wham_vcf_tar
186+
File ref_panel_del_bed
187+
File ref_panel_dup_bed
188+
File depth_exclude_list
189+
Float depth_exclude_overlap_fraction
190+
Float depth_interval_overlap
191+
String? depth_clustering_algorithm
192+
File pesr_exclude_intervals
193+
Float pesr_interval_overlap
194+
String? pesr_clustering_algorithm
195+
File cutoffs
196+
File genotyping_rd_table
197+
File genotyping_pe_table
198+
File genotyping_sr_table
199+
File bin_exclude
200+
Float clean_vcf_min_sr_background_fail_batches
201+
File clustering_config_part1
202+
File stratification_config_part1
203+
File clustering_config_part2
204+
File stratification_config_part2
205+
Array[String] clustering_track_names
206+
Array[File] clustering_track_bed_files
207+
File cytobands
208+
File mei_bed
209+
Int max_shard_size_resolve
210+
String chr_x
211+
String chr_y
212+
File protein_coding_gtf
213+
File noncoding_bed
214+
Int annotation_sv_per_shard
215+
File? external_af_ref_bed
216+
String? external_af_ref_bed_prefix
217+
Array[String]? external_af_population
218+
Int min_pe_cpx
219+
Int min_pe_ctx
220+
File gq_recalibrator_model_file
221+
Array[String] recalibrate_gq_args
222+
Array[File] genome_tracks
223+
Float no_call_rate_cutoff
224+
File sl_cutoff_table
225+
String? sl_filter_args
226+
File qc_definitions
227+
File ref_panel_median_cov
228+
File HERVK_reference
229+
File LINE1_reference
230+
File intron_reference
231+
File? outlier_samples_list
232+
File? par_bed
233+
Boolean run_sampleevidence_metrics
234+
File PE_metrics
235+
File SR_metrics
236+
File rmsk
237+
File segdups
238+
File genotype_pesr_depth_sepcutoff
239+
File genotype_pesr_pesr_sepcutoff
240+
File genotype_depth_depth_sepcutoff
241+
File genotype_depth_pesr_sepcutoff
242+
Int genotyping_n_per_split
243+
Int n_RD_genotype_bins
244+
Int clean_vcf1b_records_per_shard
245+
Int clean_vcf5_records_per_shard
246+
Int clean_vcf_max_shards_per_chrom_clean_vcf_step1
247+
Int clean_vcf_min_records_per_shard_clean_vcf_step1
248+
Int clean_vcf_random_seed
249+
Int clean_vcf_samples_per_clean_vcf_step2_shard
250+
Int refine_complex_variants_n_per_split
40251
String sv_shell_docker
41252
RuntimeAttr? runtime_attr_override
42253
}
43254

255+
String final_vcf_filename = sample_id + ".vcf.gz"
256+
String final_vcf_idx_filename = final_vcf_filename + ".tbi"
257+
String pre_cleanup_vcf_filename = batch + ".annotated.vcf.gz"
258+
String pre_cleanup_vcf_idx_filename = pre_cleanup_vcf_filename + ".tbi"
259+
String stripy_json_filename = sample_id + ".stripy.json"
260+
String stripy_tsv_filename = sample_id + ".stripy.tsv"
261+
String stripy_html_filename = sample_id + ".stripy.html"
262+
String stripy_vcf_filename = sample_id + ".stripy.vcf"
263+
String metrics_filename = "single_sample." + batch + ".metrics.tsv"
264+
String qc_filename = "sv_qc." + batch + ".tsv"
265+
String ploidy_matrix_filename = batch + "_ploidy_matrix.bed.gz"
266+
String ploidy_plots_filename = batch + "_ploidy_plots.tar.gz"
267+
String non_genotyped_unique_depth_calls_filename = batch + ".non_genotyped_unique_depth_calls.vcf.gz"
268+
44269
command <<<
45270
set -Exeuo pipefail
46271
@@ -50,10 +275,26 @@ task RunSVShell {
50275
mkdir -p "${PWD}/wd/tmp"
51276
52277
touch single_sample_pipeline_inputs.json
278+
279+
df -h
53280
>>>
54281

55282
output {
56283
File inputs_json = "single_sample_pipeline_inputs.json"
284+
# File outputs_json = "single_sample_pipeline_outputs.json"
285+
# File final_vcf = final_vcf_filename
286+
# File final_vcf_idx = final_vcf_idx_filename
287+
# File pre_cleanup_vcf = pre_cleanup_vcf_filename
288+
# File pre_cleanup_vcf_idx = pre_cleanup_vcf_idx_filename
289+
# File stripy_json_output = stripy_json_filename
290+
# File stripy_tsv_output = stripy_tsv_filename
291+
# File stripy_html_output = stripy_html_filename
292+
# File stripy_vcf_output = stripy_vcf_filename
293+
# File metrics_file = metrics_filename
294+
# File qc_file = qc_filename
295+
# File ploidy_matrix = ploidy_matrix_filename
296+
# File ploidy_plots = ploidy_plots_filename
297+
# File non_genotyped_unique_depth_calls = non_genotyped_unique_depth_calls_filename
57298
}
58299

59300
RuntimeAttr default_attr = object {

0 commit comments

Comments
 (0)